001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package org.conqat.lib.commons.io;
018
019import java.io.ByteArrayInputStream;
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.List;
025import java.util.OptionalInt;
026import java.util.OptionalLong;
027import java.util.zip.GZIPInputStream;
028import java.util.zip.GZIPOutputStream;
029
030import org.conqat.lib.commons.assertion.CCSMAssert;
031import org.conqat.lib.commons.filesystem.FileSystemUtils;
032import org.conqat.lib.commons.string.StringUtils;
033
034/**
035 * Utility methods for dealing with raw byte arrays. This is located in the I/O
036 * package, as the typical application for these methods is binary I/O on byte
037 * array level.
038 */
039public class ByteArrayUtils {
040
041        /**
042         * Converts an integer value to a byte array. The returned array has a length of
043         * {@link Integer#BYTES}.
044         */
045        public static byte[] intToByteArray(int value) {
046                byte[] bytes = new byte[Integer.BYTES];
047                storeIntInStartOfArray(value, bytes);
048                return bytes;
049        }
050
051        /** Stores the given int at the first 4 bytes of the array. */
052        public static void storeIntInStartOfArray(int value, byte[] bytes) {
053                bytes[0] = (byte) (value >> 24);
054                bytes[1] = (byte) (value >> 16);
055                bytes[2] = (byte) (value >> 8);
056                bytes[3] = (byte) (value);
057        }
058
059        /**
060         * Converts a double value to a byte array. The returned array has a length of
061         * {@link Double#BYTES}
062         */
063        public static byte[] doubleToByteArray(double value) {
064                long longBits = Double.doubleToRawLongBits(value);
065                return ByteArrayUtils.longToByteArray(longBits);
066        }
067
068        /**
069         * Converts a long value to a byte array. The returned array has a length of
070         * {@link Long#BYTES}
071         */
072        public static byte[] longToByteArray(long value) {
073                byte[] bytes = new byte[Long.BYTES];
074                bytes[0] = (byte) (value >> 56);
075                bytes[1] = (byte) (value >> 48);
076                bytes[2] = (byte) (value >> 40);
077                bytes[3] = (byte) (value >> 32);
078                bytes[4] = (byte) (value >> 24);
079                bytes[5] = (byte) (value >> 16);
080                bytes[6] = (byte) (value >> 8);
081                bytes[7] = (byte) (value);
082                return bytes;
083        }
084
085        /**
086         * Converts a byte array to an integer value.
087         * 
088         * Overall, this method is only guaranteed to work if the input array was
089         * created by {@link #intToByteArray(int)}.
090         */
091        public static int byteArrayToInt(byte[] bytes) {
092                CCSMAssert.isTrue(bytes.length == Integer.BYTES, "bytes.length must be 4");
093                return readIntFromStartOfArray(bytes);
094        }
095
096        /**
097         * Converts a byte array to an optional int value, by mapping a null input array
098         * to empty.
099         */
100        public static OptionalInt byteArrayToOptionalInt(byte[] bytes) {
101                if (bytes == null) {
102                        return OptionalInt.empty();
103                }
104                return OptionalInt.of(byteArrayToInt(bytes));
105        }
106
107        /**
108         * Reads an int stored with {@link #storeIntInStartOfArray(int, byte[])} from
109         * the first 4 bytes of the array.
110         */
111        public static int readIntFromStartOfArray(byte[] bytes) {
112                int value = 0;
113                value |= unsignedByte(bytes[0]) << 24;
114                value |= unsignedByte(bytes[1]) << 16;
115                value |= unsignedByte(bytes[2]) << 8;
116                value |= unsignedByte(bytes[3]);
117                return value;
118        }
119
120        /**
121         * Converts a byte array to a double value.
122         * 
123         * Overall, this method is only guaranteed to work if the input array was
124         * created by {@link #doubleToByteArray(double)}.
125         */
126        public static double byteArrayToDouble(byte[] value) {
127                long longBits = ByteArrayUtils.byteArrayToLong(value);
128                return Double.longBitsToDouble(longBits);
129        }
130
131        /**
132         * Converts a byte array to a long value.
133         * 
134         * Overall, this method is only guaranteed to work if the input array was
135         * created by {@link #longToByteArray(long)}.
136         */
137        public static long byteArrayToLong(byte[] bytes) {
138                CCSMAssert.isTrue(bytes.length == Long.BYTES, "bytes.length must be 8");
139                long value = 0L;
140                value |= unsignedByteAsLong(bytes[0]) << 56;
141                value |= unsignedByteAsLong(bytes[1]) << 48;
142                value |= unsignedByteAsLong(bytes[2]) << 40;
143                value |= unsignedByteAsLong(bytes[3]) << 32;
144                value |= unsignedByteAsLong(bytes[4]) << 24;
145                value |= unsignedByteAsLong(bytes[5]) << 16;
146                value |= unsignedByteAsLong(bytes[6]) << 8;
147                value |= unsignedByteAsLong(bytes[7]);
148                return value;
149        }
150
151        /**
152         * Converts a byte array to an optional long value, by mapping a null input
153         * array to empty.
154         */
155        public static OptionalLong byteArrayToOptionalLong(byte[] bytes) {
156                if (bytes == null) {
157                        return OptionalLong.empty();
158                }
159                return OptionalLong.of(byteArrayToLong(bytes));
160        }
161
162        /**
163         * Decompresses a single byte[] using GZIP. A null input array will cause this
164         * method to return null.
165         * 
166         * @throws IOException
167         *             if the input array is not valid GZIP compressed data (as created
168         *             by {@link #compress(byte[])}).
169         */
170        public static byte[] decompress(byte[] value) throws IOException {
171                if (value == null) {
172                        return null;
173                }
174
175                ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length);
176                ByteArrayInputStream bis = new ByteArrayInputStream(value);
177                GZIPInputStream gzis = new GZIPInputStream(bis);
178
179                FileSystemUtils.copy(gzis, bos);
180
181                // it does not matter if we close in case of exceptions, as these are
182                // in-memory resources
183                gzis.close();
184                bos.close();
185
186                return bos.toByteArray();
187        }
188
189        /**
190         * Compresses a single byte[] using GZIP. A null input array will cause this
191         * method to return null.
192         */
193        public static byte[] compress(byte[] value) {
194                if (value == null) {
195                        return null;
196                }
197
198                ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length);
199                try {
200                        GZIPOutputStream gzos = new GZIPOutputStream(bos);
201                        gzos.write(value);
202
203                        // it does not matter if we close in case of exceptions, as this is
204                        // an in-memory resource
205                        gzos.close();
206                } catch (IOException e) {
207                        throw new AssertionError("Can not happen as we work in memory: " + e.getMessage());
208                }
209
210                return bos.toByteArray();
211        }
212
213        /** Returns whether the prefix is a prefix of the given key. */
214        public static boolean isPrefix(byte[] prefix, byte[] key) {
215                return isPrefix(prefix, key, 0);
216        }
217
218        /**
219         * Returns whether the <code>prefix</code> is a prefix of the given
220         * <code>key</code> when only looking at the part of <code>key</code> starting
221         * at <code>startIndex</code>.
222         */
223        public static boolean isPrefix(byte[] prefix, byte[] key, int startIndex) {
224
225                if (key.length - startIndex < prefix.length) {
226                        return false;
227                }
228                for (int i = 0; i < prefix.length; ++i) {
229                        if (prefix[i] != key[i + startIndex]) {
230                                return false;
231                        }
232                }
233                return true;
234        }
235
236        /** Returns true if a1 is (lexicographically) less than a2. */
237        public static boolean isLess(byte[] a1, byte[] a2, boolean resultIfEqual) {
238                int limit = Math.min(a1.length, a2.length);
239                for (int i = 0; i < limit; ++i) {
240                        if (unsignedByte(a1[i]) < unsignedByte(a2[i])) {
241                                return true;
242                        }
243                        if (unsignedByte(a1[i]) > unsignedByte(a2[i])) {
244                                return false;
245                        }
246                }
247
248                if (a1.length < a2.length) {
249                        return true;
250                }
251                if (a1.length > a2.length) {
252                        return false;
253                }
254
255                return resultIfEqual;
256        }
257
258        /** Returns the unsigned byte interpretation of the parameter. */
259        public static int unsignedByte(byte b) {
260                return b & 0xff;
261        }
262
263        /** Returns the unsigned byte interpretation of the parameter as long. */
264        public static long unsignedByteAsLong(byte b) {
265                return b & 0xffL;
266        }
267
268        /** Returns the concatenation of the given arrays. */
269        public static byte[] concat(byte[]... arrays) {
270                return concat(Arrays.asList(arrays));
271        }
272
273        /** Returns the concatenation of the given arrays. */
274        public static byte[] concat(Iterable<byte[]> arrays) {
275                int length = 0;
276                for (byte[] array : arrays) {
277                        length += array.length;
278                }
279
280                byte[] result = new byte[length];
281                int start = 0;
282                for (byte[] array : arrays) {
283                        System.arraycopy(array, 0, result, start, array.length);
284                        start += array.length;
285                }
286                return result;
287        }
288
289        /**
290         * Creates a hex dump of the provided bytes. This is similar to output from
291         * hexdump tools and primarily used for debugging. The output string will
292         * contain in each line 16 bytes of data first printed as hex numbers and then
293         * as a string interpretation. Each line is also prefixed with an offset.
294         */
295        public static String hexDump(byte[] data) {
296                return hexDump(data, 16);
297        }
298
299        /**
300         * Creates a hex dump of the provided bytes. This is similar to output from
301         * hexdump tools and primarily used for debugging. The output string will
302         * contain in each line <code>width</code> bytes of data first printed as hex
303         * numbers and then as a string interpretation. Each line is also prefixed with
304         * an offset.
305         */
306        public static String hexDump(byte[] data, int width) {
307                CCSMAssert.isTrue(width >= 1, "Width must be positive!");
308
309                StringBuilder builder = new StringBuilder();
310                for (int i = 0; i < data.length; i += width) {
311                        hexDumpAppendLine(data, i, Math.min(data.length, i + width), width, builder);
312                }
313                return builder.toString();
314        }
315
316        /**
317         * Appends a single line to the hex dump for {@link #hexDump(byte[], int)}. The
318         * start is inclusive, the end is exclusive.
319         */
320        private static void hexDumpAppendLine(byte[] data, int startOffset, int endOffset, int width,
321                        StringBuilder builder) {
322                builder.append(String.format("%06d: ", startOffset));
323                for (int i = startOffset; i < endOffset; ++i) {
324                        builder.append(String.format("%02x ", data[i]));
325                }
326
327                if (endOffset - startOffset < width) {
328                        builder.append(StringUtils.fillString((width - (endOffset - startOffset)) * 3, StringUtils.SPACE_CHAR));
329                }
330
331                builder.append(StringUtils.SPACE_CHAR);
332                for (int i = startOffset; i < endOffset; ++i) {
333                        boolean isInPrintableAsciiRange = (33 <= data[i] && data[i] <= 126);
334                        if (isInPrintableAsciiRange) {
335                                builder.append((char) data[i]);
336                        } else {
337                                builder.append('.');
338                        }
339                }
340
341                builder.append(StringUtils.LINE_SEPARATOR);
342        }
343
344        /**
345         * Returns whether the given bytes start with the
346         * <a href="http://en.wikipedia.org/wiki/Zip_%28file_format%29#File_headers"
347         * >magic bytes</a> that mark a ZIP file.
348         */
349        public static boolean startsWithZipMagicBytes(byte[] data) {
350                return isPrefix(new byte[] { 0x50, 0x4b, 0x03, 0x04 }, data);
351        }
352
353        /**
354         * Returns the first index in <code>searchIn</code> at or after the start index
355         * containing <code>searchFor</code> (or -1 if not found).
356         */
357        public static int indexOf(byte[] searchFor, byte[] searchIn, int startIndex) {
358                return indexOf(searchFor, searchIn, startIndex, searchIn.length);
359        }
360
361        /**
362         * Returns the first index in <code>searchIn</code> at or after the start index
363         * containing <code>searchFor</code> (or -1 if not found). endIndex is the index
364         * of the first byte that is not considered in the match (exclusive).
365         */
366        public static int indexOf(byte[] searchFor, byte[] searchIn, int startIndex, int endIndex) {
367                if (startIndex + searchFor.length >= endIndex) {
368                        return -1;
369                }
370
371                for (int i = startIndex; i <= endIndex - searchFor.length; ++i) {
372                        if (isPrefix(searchFor, searchIn, i)) {
373                                return i;
374                        }
375                }
376
377                return -1;
378        }
379
380        /**
381         * Perform a split with no limit according to
382         * {@link #split(byte[], byte[], int)}.
383         */
384        public static List<byte[]> split(byte[] bytes, byte[] separatorBytes) {
385                return split(bytes, separatorBytes, Integer.MAX_VALUE);
386        }
387
388        /**
389         * Splits the byte array at the separator bytes given maximum split amount of
390         * times. The result {@link ArrayList} is never longer than the maximum split.
391         *
392         * @param bytes
393         *            The bytes to split. An empty list is returned if this is null.
394         * @param separatorBytes
395         *            Non null array of bytes to split at. Must have positive length.
396         * @param maxSplits
397         *            the maximum number of splits to perform starting at the beginning
398         *            of the bytes array. An empty list is returned if this is 0 or
399         *            negative.
400         */
401        public static List<byte[]> split(byte[] bytes, byte[] separatorBytes, int maxSplits) {
402                CCSMAssert.isNotNull(separatorBytes, "Separator bytes for byte array split can't be null.");
403                CCSMAssert.isTrue(separatorBytes.length > 0, "Separator bytes array must have positive length.");
404
405                List<byte[]> result = new ArrayList<>();
406
407                if (maxSplits <= 0 || bytes == null) {
408                        return result;
409                }
410
411                int start = 0;
412
413                for (int i = 0; i < bytes.length; i++) {
414                        if (result.size() == maxSplits - 1) {
415                                break;
416                        }
417
418                        if (ByteArrayUtils.isPrefix(separatorBytes, bytes, i)) {
419                                result.add(Arrays.copyOfRange(bytes, start, i));
420                                i += separatorBytes.length;
421                                start = i;
422                        }
423                }
424
425                result.add(Arrays.copyOfRange(bytes, start, bytes.length));
426
427                return result;
428        }
429}