001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package org.conqat.lib.commons.io; 018 019import java.io.ByteArrayInputStream; 020import java.io.ByteArrayOutputStream; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.List; 025import java.util.OptionalInt; 026import java.util.OptionalLong; 027import java.util.zip.GZIPInputStream; 028import java.util.zip.GZIPOutputStream; 029 030import org.conqat.lib.commons.assertion.CCSMAssert; 031import org.conqat.lib.commons.filesystem.FileSystemUtils; 032import org.conqat.lib.commons.string.StringUtils; 033 034/** 035 * Utility methods for dealing with raw byte arrays. This is located in the I/O 036 * package, as the typical application for these methods is binary I/O on byte 037 * array level. 038 */ 039public class ByteArrayUtils { 040 041 /** 042 * Converts an integer value to a byte array. The returned array has a length of 043 * {@link Integer#BYTES}. 044 */ 045 public static byte[] intToByteArray(int value) { 046 byte[] bytes = new byte[Integer.BYTES]; 047 storeIntInStartOfArray(value, bytes); 048 return bytes; 049 } 050 051 /** Stores the given int at the first 4 bytes of the array. */ 052 public static void storeIntInStartOfArray(int value, byte[] bytes) { 053 bytes[0] = (byte) (value >> 24); 054 bytes[1] = (byte) (value >> 16); 055 bytes[2] = (byte) (value >> 8); 056 bytes[3] = (byte) (value); 057 } 058 059 /** 060 * Converts a double value to a byte array. The returned array has a length of 061 * {@link Double#BYTES} 062 */ 063 public static byte[] doubleToByteArray(double value) { 064 long longBits = Double.doubleToRawLongBits(value); 065 return ByteArrayUtils.longToByteArray(longBits); 066 } 067 068 /** 069 * Converts a long value to a byte array. The returned array has a length of 070 * {@link Long#BYTES} 071 */ 072 public static byte[] longToByteArray(long value) { 073 byte[] bytes = new byte[Long.BYTES]; 074 bytes[0] = (byte) (value >> 56); 075 bytes[1] = (byte) (value >> 48); 076 bytes[2] = (byte) (value >> 40); 077 bytes[3] = (byte) (value >> 32); 078 bytes[4] = (byte) (value >> 24); 079 bytes[5] = (byte) (value >> 16); 080 bytes[6] = (byte) (value >> 8); 081 bytes[7] = (byte) (value); 082 return bytes; 083 } 084 085 /** 086 * Converts a byte array to an integer value. 087 * 088 * Overall, this method is only guaranteed to work if the input array was 089 * created by {@link #intToByteArray(int)}. 090 */ 091 public static int byteArrayToInt(byte[] bytes) { 092 CCSMAssert.isTrue(bytes.length == Integer.BYTES, "bytes.length must be 4"); 093 return readIntFromStartOfArray(bytes); 094 } 095 096 /** 097 * Converts a byte array to an optional int value, by mapping a null input array 098 * to empty. 099 */ 100 public static OptionalInt byteArrayToOptionalInt(byte[] bytes) { 101 if (bytes == null) { 102 return OptionalInt.empty(); 103 } 104 return OptionalInt.of(byteArrayToInt(bytes)); 105 } 106 107 /** 108 * Reads an int stored with {@link #storeIntInStartOfArray(int, byte[])} from 109 * the first 4 bytes of the array. 110 */ 111 public static int readIntFromStartOfArray(byte[] bytes) { 112 int value = 0; 113 value |= unsignedByte(bytes[0]) << 24; 114 value |= unsignedByte(bytes[1]) << 16; 115 value |= unsignedByte(bytes[2]) << 8; 116 value |= unsignedByte(bytes[3]); 117 return value; 118 } 119 120 /** 121 * Converts a byte array to a double value. 122 * 123 * Overall, this method is only guaranteed to work if the input array was 124 * created by {@link #doubleToByteArray(double)}. 125 */ 126 public static double byteArrayToDouble(byte[] value) { 127 long longBits = ByteArrayUtils.byteArrayToLong(value); 128 return Double.longBitsToDouble(longBits); 129 } 130 131 /** 132 * Converts a byte array to a long value. 133 * 134 * Overall, this method is only guaranteed to work if the input array was 135 * created by {@link #longToByteArray(long)}. 136 */ 137 public static long byteArrayToLong(byte[] bytes) { 138 CCSMAssert.isTrue(bytes.length == Long.BYTES, "bytes.length must be 8"); 139 long value = 0L; 140 value |= unsignedByteAsLong(bytes[0]) << 56; 141 value |= unsignedByteAsLong(bytes[1]) << 48; 142 value |= unsignedByteAsLong(bytes[2]) << 40; 143 value |= unsignedByteAsLong(bytes[3]) << 32; 144 value |= unsignedByteAsLong(bytes[4]) << 24; 145 value |= unsignedByteAsLong(bytes[5]) << 16; 146 value |= unsignedByteAsLong(bytes[6]) << 8; 147 value |= unsignedByteAsLong(bytes[7]); 148 return value; 149 } 150 151 /** 152 * Converts a byte array to an optional long value, by mapping a null input 153 * array to empty. 154 */ 155 public static OptionalLong byteArrayToOptionalLong(byte[] bytes) { 156 if (bytes == null) { 157 return OptionalLong.empty(); 158 } 159 return OptionalLong.of(byteArrayToLong(bytes)); 160 } 161 162 /** 163 * Decompresses a single byte[] using GZIP. A null input array will cause this 164 * method to return null. 165 * 166 * @throws IOException 167 * if the input array is not valid GZIP compressed data (as created 168 * by {@link #compress(byte[])}). 169 */ 170 public static byte[] decompress(byte[] value) throws IOException { 171 if (value == null) { 172 return null; 173 } 174 175 ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length); 176 ByteArrayInputStream bis = new ByteArrayInputStream(value); 177 GZIPInputStream gzis = new GZIPInputStream(bis); 178 179 FileSystemUtils.copy(gzis, bos); 180 181 // it does not matter if we close in case of exceptions, as these are 182 // in-memory resources 183 gzis.close(); 184 bos.close(); 185 186 return bos.toByteArray(); 187 } 188 189 /** 190 * Compresses a single byte[] using GZIP. A null input array will cause this 191 * method to return null. 192 */ 193 public static byte[] compress(byte[] value) { 194 if (value == null) { 195 return null; 196 } 197 198 ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length); 199 try { 200 GZIPOutputStream gzos = new GZIPOutputStream(bos); 201 gzos.write(value); 202 203 // it does not matter if we close in case of exceptions, as this is 204 // an in-memory resource 205 gzos.close(); 206 } catch (IOException e) { 207 throw new AssertionError("Can not happen as we work in memory: " + e.getMessage()); 208 } 209 210 return bos.toByteArray(); 211 } 212 213 /** Returns whether the prefix is a prefix of the given key. */ 214 public static boolean isPrefix(byte[] prefix, byte[] key) { 215 return isPrefix(prefix, key, 0); 216 } 217 218 /** 219 * Returns whether the <code>prefix</code> is a prefix of the given 220 * <code>key</code> when only looking at the part of <code>key</code> starting 221 * at <code>startIndex</code>. 222 */ 223 public static boolean isPrefix(byte[] prefix, byte[] key, int startIndex) { 224 225 if (key.length - startIndex < prefix.length) { 226 return false; 227 } 228 for (int i = 0; i < prefix.length; ++i) { 229 if (prefix[i] != key[i + startIndex]) { 230 return false; 231 } 232 } 233 return true; 234 } 235 236 /** Returns true if a1 is (lexicographically) less than a2. */ 237 public static boolean isLess(byte[] a1, byte[] a2, boolean resultIfEqual) { 238 int limit = Math.min(a1.length, a2.length); 239 for (int i = 0; i < limit; ++i) { 240 if (unsignedByte(a1[i]) < unsignedByte(a2[i])) { 241 return true; 242 } 243 if (unsignedByte(a1[i]) > unsignedByte(a2[i])) { 244 return false; 245 } 246 } 247 248 if (a1.length < a2.length) { 249 return true; 250 } 251 if (a1.length > a2.length) { 252 return false; 253 } 254 255 return resultIfEqual; 256 } 257 258 /** Returns the unsigned byte interpretation of the parameter. */ 259 public static int unsignedByte(byte b) { 260 return b & 0xff; 261 } 262 263 /** Returns the unsigned byte interpretation of the parameter as long. */ 264 public static long unsignedByteAsLong(byte b) { 265 return b & 0xffL; 266 } 267 268 /** Returns the concatenation of the given arrays. */ 269 public static byte[] concat(byte[]... arrays) { 270 return concat(Arrays.asList(arrays)); 271 } 272 273 /** Returns the concatenation of the given arrays. */ 274 public static byte[] concat(Iterable<byte[]> arrays) { 275 int length = 0; 276 for (byte[] array : arrays) { 277 length += array.length; 278 } 279 280 byte[] result = new byte[length]; 281 int start = 0; 282 for (byte[] array : arrays) { 283 System.arraycopy(array, 0, result, start, array.length); 284 start += array.length; 285 } 286 return result; 287 } 288 289 /** 290 * Creates a hex dump of the provided bytes. This is similar to output from 291 * hexdump tools and primarily used for debugging. The output string will 292 * contain in each line 16 bytes of data first printed as hex numbers and then 293 * as a string interpretation. Each line is also prefixed with an offset. 294 */ 295 public static String hexDump(byte[] data) { 296 return hexDump(data, 16); 297 } 298 299 /** 300 * Creates a hex dump of the provided bytes. This is similar to output from 301 * hexdump tools and primarily used for debugging. The output string will 302 * contain in each line <code>width</code> bytes of data first printed as hex 303 * numbers and then as a string interpretation. Each line is also prefixed with 304 * an offset. 305 */ 306 public static String hexDump(byte[] data, int width) { 307 CCSMAssert.isTrue(width >= 1, "Width must be positive!"); 308 309 StringBuilder builder = new StringBuilder(); 310 for (int i = 0; i < data.length; i += width) { 311 hexDumpAppendLine(data, i, Math.min(data.length, i + width), width, builder); 312 } 313 return builder.toString(); 314 } 315 316 /** 317 * Appends a single line to the hex dump for {@link #hexDump(byte[], int)}. The 318 * start is inclusive, the end is exclusive. 319 */ 320 private static void hexDumpAppendLine(byte[] data, int startOffset, int endOffset, int width, 321 StringBuilder builder) { 322 builder.append(String.format("%06d: ", startOffset)); 323 for (int i = startOffset; i < endOffset; ++i) { 324 builder.append(String.format("%02x ", data[i])); 325 } 326 327 if (endOffset - startOffset < width) { 328 builder.append(StringUtils.fillString((width - (endOffset - startOffset)) * 3, StringUtils.SPACE_CHAR)); 329 } 330 331 builder.append(StringUtils.SPACE_CHAR); 332 for (int i = startOffset; i < endOffset; ++i) { 333 boolean isInPrintableAsciiRange = (33 <= data[i] && data[i] <= 126); 334 if (isInPrintableAsciiRange) { 335 builder.append((char) data[i]); 336 } else { 337 builder.append('.'); 338 } 339 } 340 341 builder.append(StringUtils.LINE_SEPARATOR); 342 } 343 344 /** 345 * Returns whether the given bytes start with the 346 * <a href="http://en.wikipedia.org/wiki/Zip_%28file_format%29#File_headers" 347 * >magic bytes</a> that mark a ZIP file. 348 */ 349 public static boolean startsWithZipMagicBytes(byte[] data) { 350 return isPrefix(new byte[] { 0x50, 0x4b, 0x03, 0x04 }, data); 351 } 352 353 /** 354 * Returns the first index in <code>searchIn</code> at or after the start index 355 * containing <code>searchFor</code> (or -1 if not found). 356 */ 357 public static int indexOf(byte[] searchFor, byte[] searchIn, int startIndex) { 358 return indexOf(searchFor, searchIn, startIndex, searchIn.length); 359 } 360 361 /** 362 * Returns the first index in <code>searchIn</code> at or after the start index 363 * containing <code>searchFor</code> (or -1 if not found). endIndex is the index 364 * of the first byte that is not considered in the match (exclusive). 365 */ 366 public static int indexOf(byte[] searchFor, byte[] searchIn, int startIndex, int endIndex) { 367 if (startIndex + searchFor.length >= endIndex) { 368 return -1; 369 } 370 371 for (int i = startIndex; i <= endIndex - searchFor.length; ++i) { 372 if (isPrefix(searchFor, searchIn, i)) { 373 return i; 374 } 375 } 376 377 return -1; 378 } 379 380 /** 381 * Perform a split with no limit according to 382 * {@link #split(byte[], byte[], int)}. 383 */ 384 public static List<byte[]> split(byte[] bytes, byte[] separatorBytes) { 385 return split(bytes, separatorBytes, Integer.MAX_VALUE); 386 } 387 388 /** 389 * Splits the byte array at the separator bytes given maximum split amount of 390 * times. The result {@link ArrayList} is never longer than the maximum split. 391 * 392 * @param bytes 393 * The bytes to split. An empty list is returned if this is null. 394 * @param separatorBytes 395 * Non null array of bytes to split at. Must have positive length. 396 * @param maxSplits 397 * the maximum number of splits to perform starting at the beginning 398 * of the bytes array. An empty list is returned if this is 0 or 399 * negative. 400 */ 401 public static List<byte[]> split(byte[] bytes, byte[] separatorBytes, int maxSplits) { 402 CCSMAssert.isNotNull(separatorBytes, "Separator bytes for byte array split can't be null."); 403 CCSMAssert.isTrue(separatorBytes.length > 0, "Separator bytes array must have positive length."); 404 405 List<byte[]> result = new ArrayList<>(); 406 407 if (maxSplits <= 0 || bytes == null) { 408 return result; 409 } 410 411 int start = 0; 412 413 for (int i = 0; i < bytes.length; i++) { 414 if (result.size() == maxSplits - 1) { 415 break; 416 } 417 418 if (ByteArrayUtils.isPrefix(separatorBytes, bytes, i)) { 419 result.add(Arrays.copyOfRange(bytes, start, i)); 420 i += separatorBytes.length; 421 start = i; 422 } 423 } 424 425 result.add(Arrays.copyOfRange(bytes, start, bytes.length)); 426 427 return result; 428 } 429}