001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 The ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package org.conqat.lib.commons.string; 018 019import java.io.ByteArrayInputStream; 020import java.io.ByteArrayOutputStream; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.PrintWriter; 024import java.io.StringWriter; 025import java.nio.charset.Charset; 026import java.nio.charset.StandardCharsets; 027import java.text.NumberFormat; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collection; 031import java.util.Collections; 032import java.util.HashMap; 033import java.util.Iterator; 034import java.util.List; 035import java.util.Map; 036import java.util.Map.Entry; 037import java.util.Optional; 038import java.util.Random; 039import java.util.Set; 040import java.util.regex.Matcher; 041import java.util.regex.Pattern; 042import java.util.stream.Collectors; 043 044import org.conqat.lib.commons.algo.Diff; 045import org.conqat.lib.commons.assertion.CCSMAssert; 046import org.conqat.lib.commons.collections.CollectionUtils; 047import org.conqat.lib.commons.collections.Pair; 048import org.conqat.lib.commons.collections.PairList; 049import org.conqat.lib.commons.filesystem.EByteOrderMark; 050import org.conqat.lib.commons.filesystem.FileSystemUtils; 051 052/** 053 * A utility class providing some advanced string functionality. 054 */ 055public class StringUtils { 056 057 /** 058 * Matches all whitespace at the beginning of each line. 059 * 060 * We deliberately don't use "\\s" here because this also matches new lines. 061 * Instead we use "\\p{Zs}" which matches all unicode horizontal whitespace 062 * characters. 063 */ 064 private static final Pattern LEADING_WHITESPACE_PATTERN = Pattern.compile("^[\\t\\p{Zs}]+", Pattern.MULTILINE); 065 066 /** Line separator of the current platform. */ 067 public static final String LINE_SEPARATOR = System.getProperty("line.separator"); 068 069 /** Line feed ("\n"), platform independent. */ 070 public static final String LINE_FEED = "\n"; 071 072 /** 073 * The empty string. Use StringUtils.EMPTY_STRING instead of "" (our Java coding 074 * guidelines) 075 * 076 * <ol> 077 * <li>Use <code>StringUtils.EMPTY_STRING</code> instead of an empty string 078 * literal (<code>""</code>) to make it clear that this is intentionally empty. 079 * Using "" may leave the reader in doubt whether this is something unfinished 080 * (e.g. "" used temporally until something meaningful is filled in) or 081 * intentionally the empty String. Making it explicit removes this problem)</li> 082 * <li>On most JVMs, using "" allocates a new String object per class in which 083 * it used. Using EMPTY_STRING re-uses a single object and safes a few bytes. 084 * Note</li> 085 * <li>Makes " " and "" better distinguishable.</li> 086 * </ol> 087 */ 088 public static final String EMPTY_STRING = ""; 089 090 /** A space. */ 091 public static final String SPACE = " "; 092 093 /** A space character. */ 094 public static final char SPACE_CHAR = ' '; 095 096 /** A tab character. */ 097 public static final String TAB = "\t"; 098 099 /** Two spaces. */ 100 public static final String TWO_SPACES = " "; 101 102 /** Dot. */ 103 public static final String DOT = "."; 104 105 /** Number formatter. */ 106 private static NumberFormat numberFormat = NumberFormat.getInstance(); 107 108 /** Number formatter for percentages. */ 109 private static NumberFormat percentageFormat = NumberFormat.getPercentInstance(); 110 111 /** Random number generator. */ 112 private static final Random random = new Random(); 113 114 /** Char strings used to convert bytes to a hex string */ 115 private static final char[] HEX_CHARACTERS = "0123456789ABCDEF".toCharArray(); 116 117 /** 118 * Characters that need to be escaped in markdown. use with 119 * {@link StringUtils#escapeChars(String, List)} and 120 * {@link StringUtils#unEscapeChars(String, List)} 121 */ 122 public static final List<Character> MARKDOWN_ESCAPE_CHARACTERS = Arrays.asList('[', ']', '(', ')', '*', '#', '_', 123 '~', '^', '+', '=', '>'); 124 125 /** 126 * Create a string of the given length and center the given string within it. 127 * Left and right areas are filled by the character provided. 128 * 129 * @param string 130 * The input string. 131 * @param length 132 * The length of the string to be returned. 133 * @param c 134 * The character to surround the input string with. 135 * @return the new string or, if the string is longer than the specified length, 136 * the original string. 137 * @see #flushLeft(String, int, char) 138 * @see #flushRight(String, int, char) 139 */ 140 public static String center(String string, int length, char c) { 141 if (string.length() >= length) { 142 return string; 143 } 144 int strLen = string.length(); 145 int fillLen = (length - strLen) / 2; 146 String leftFiller = fillString(fillLen, c); 147 148 if ((length - strLen) % 2 != 0) { 149 fillLen++; 150 } 151 152 String rightFiller = fillString(fillLen, c); 153 154 return leftFiller + string + rightFiller; 155 } 156 157 /** 158 * Compares two strings both of which may be <code>null</code>. A string which 159 * is <code>null</code> is always smaller than the other string, except for both 160 * strings being <code>null</code>. 161 * 162 * @param a 163 * The string which is compared to the second string. 164 * @param b 165 * The string which is compared to the first string. 166 * @return Returns 0 if both strings are <code>null</code>, -1 if only the first 167 * string is <code>null</code>, and 1 if only the second string is 168 * <code>null</code>. If both strings are not <code>null</code>, returns 169 * the result of the usual string comparison. 170 */ 171 public static int compare(String a, String b) { 172 if (a == b) { 173 return 0; 174 } 175 176 if (a == null) { 177 return -1; 178 } 179 180 if (b == null) { 181 return 1; 182 } 183 184 return a.compareTo(b); 185 } 186 187 /** 188 * Concatenates all elements of an iterable using the 189 * <code>toString()</code>-method. 190 * 191 * @param iterable 192 * the iterable 193 * @return a concatenation, separated by spaces 194 */ 195 public static String concat(Iterable<?> iterable) { 196 return concat(iterable, SPACE); 197 } 198 199 /** 200 * Concatenates all elements of an iterable using the 201 * <code>toString()</code>-method, separating them with the given 202 * <code>separator</code>. 203 * 204 * @param iterable 205 * the iterable containing the strings 206 * @param separator 207 * the separator to place between the strings, may be 208 * <code>null</code> 209 * @return a concatenation of the string in the iterable or <code>null</code> if 210 * iterable was <code>null</code>. If the iterable is of size 0 the 211 * empty string is returned. 212 */ 213 public static String concat(Iterable<?> iterable, String separator) { 214 if (iterable == null) { 215 return null; 216 } 217 return concat(iterable.iterator(), separator); 218 } 219 220 /** 221 * Concatenates all elements of an iterator using the 222 * <code>toString()</code>-method, separating them with the given 223 * <code>separator</code>. 224 * 225 * @param iterator 226 * the {@link Iterator} containing the strings 227 * @param separator 228 * the separator to place between the strings, may be 229 * <code>null</code> 230 * @return a concatenation of the string in the iterator or <code>null</code> if 231 * iterator was <code>null</code>. If the iterator has no elements the 232 * empty string is returned. 233 */ 234 public static String concat(Iterator<?> iterator, String separator) { 235 if (iterator == null) { 236 return null; 237 } 238 if (!iterator.hasNext()) { 239 return EMPTY_STRING; 240 } 241 242 if (separator == null) { 243 separator = EMPTY_STRING; 244 } 245 246 StringBuilder builder = new StringBuilder(); 247 248 while (iterator.hasNext()) { 249 builder.append(iterator.next()); 250 if (iterator.hasNext()) { 251 builder.append(separator); 252 } 253 } 254 255 return builder.toString(); 256 } 257 258 /** 259 * Concatenates all elements of an array using the <code>toString()</code> 260 * -method. 261 * 262 * @param array 263 * the array containing the strings 264 * @return a concatenation of the string separated by spaces 265 */ 266 public static String concat(Object[] array) { 267 return concat(array, SPACE); 268 } 269 270 /** 271 * Concatenates all elements of an array using the <code>toString()</code> 272 * -method, separating them with the given <code>separator</code>. 273 * 274 * @param array 275 * the array 276 * @param separator 277 * the separator to place between the strings, may be 278 * <code>null</code> 279 * @return a concatenation of the string in the array or <code>null</code> if 280 * array was <code>null</code>. If array is of length 0 the empty string 281 * is returned. 282 */ 283 public static String concat(Object[] array, String separator) { 284 if (array == null) { 285 return null; 286 } 287 return concat(Arrays.asList(array), separator); 288 } 289 290 /** 291 * Joins the elements in the given list with the given delimiter, but with a 292 * different delimiter for the very last element. 293 * <p> 294 * This could, for example, be used if you want to create a String "a, b and c" 295 * from a list containing "a", "b" and "c". 296 */ 297 public static String joinDifferentLastDelimiter(List<String> items, String delimiter, String lastDelimiter) { 298 int last = items.size() - 1; 299 return String.join(lastDelimiter, String.join(delimiter, items.subList(0, last)), items.get(last)); 300 } 301 302 /** Concatenate two string arrays. */ 303 public static String[] concat(String[] array1, String[] array2) { 304 String[] result = new String[array1.length + array2.length]; 305 System.arraycopy(array1, 0, result, 0, array1.length); 306 System.arraycopy(array2, 0, result, array1.length, array2.length); 307 return result; 308 } 309 310 /** 311 * Build a string with a specified length from a character. 312 * 313 * @param length 314 * The length of the string. 315 * @param c 316 * The character. 317 * @return The string. 318 */ 319 public static String fillString(int length, char c) { 320 char[] characters = new char[length]; 321 322 Arrays.fill(characters, c); 323 324 return new String(characters); 325 } 326 327 /** 328 * Create a sting of the given length starting with the provided string. 329 * Remaining characters are filled with the provided character. 330 * 331 * @param string 332 * The input string. 333 * @param length 334 * The length of the string to be returned. 335 * @param c 336 * The character to fill the string. 337 * @return the new string or, if the string is longer than the specified length, 338 * the original string. 339 * @see #flushRight(String, int, char) 340 * @see #center(String, int, char) 341 */ 342 public static String flushLeft(String string, int length, char c) { 343 int gap = length - string.length(); 344 if (gap <= 0) { 345 return string; 346 } 347 return string + StringUtils.fillString(gap, c); 348 } 349 350 /** 351 * Create a sting of the given length ending with the provided string. Remaining 352 * characters are filled with the provided character. 353 * 354 * @param string 355 * The input string. 356 * @param length 357 * The length of the string to be returned. 358 * @param c 359 * The character to fill the string. 360 * @return the new string or, if the string is longer than the specified length, 361 * the original string. 362 * @see #flushLeft(String, int, char) 363 * @see #center(String, int, char) 364 */ 365 public static String flushRight(String string, int length, char c) { 366 int gap = length - string.length(); 367 if (gap <= 0) { 368 return string; 369 } 370 return StringUtils.fillString(gap, c) + string; 371 } 372 373 /** 374 * Format number 375 */ 376 public static String format(Number number) { 377 return numberFormat.format(number); 378 } 379 380 /** 381 * Format as percentage. 382 */ 383 public static String formatAsPercentage(Number number) { 384 return percentageFormat.format(number); 385 } 386 387 /** Returns the first line of a string. */ 388 public static String getFirstLine(String string) { 389 LineSplitter lineSplitter = new LineSplitter(string); 390 return lineSplitter.next(); 391 } 392 393 /** 394 * Returns the first n part of a string, separated by the given character. 395 * 396 * E.g., getStringParts("edu.tum.cs", 2, '.') gives: "edu.tum". 397 * 398 * @param string 399 * the base string 400 * @param partNumber 401 * number of parts 402 * @param separator 403 * the separator character 404 */ 405 public static String getFirstParts(String string, int partNumber, char separator) { 406 407 if (partNumber < 0 || string == null) { 408 return string; 409 } 410 411 int idx = 0; 412 413 for (int i = 0; i < partNumber; i++) { 414 idx = string.indexOf(separator, idx + 1); 415 if (idx == -1) { 416 return string; 417 } 418 } 419 420 return string.substring(0, idx); 421 } 422 423 /** 424 * Splits a key-value string and stores it in a hash map. The string must have 425 * the following format: 426 * <p> 427 * <code>key=value[,key=value]*</code> 428 * </p> 429 * If the string is <code>null</code> <code>null</code> is returned. 430 * 431 * @param keyValueString 432 * with format described above 433 * @return a hash map containing the key-values-pairs. 434 */ 435 public static HashMap<String, String> getKeyValuePairs(String keyValueString) { 436 if (keyValueString == null) { 437 return null; 438 } 439 HashMap<String, String> result = new HashMap<>(); 440 if (keyValueString.trim().equals(EMPTY_STRING)) { 441 return result; 442 } 443 444 String[] pairs = keyValueString.split(","); 445 446 for (String pair : pairs) { 447 int index = pair.indexOf('='); 448 if (index < 0) { 449 result.put(pair.trim(), null); 450 } else { 451 String key = pair.substring(0, index).trim(); 452 String value = pair.substring(index + 1).trim(); 453 result.put(key, value); 454 } 455 } 456 return result; 457 } 458 459 /** 460 * Returns the first part of a String whose parts are separated by the given 461 * character. 462 * 463 * E.g., getFirstPart("foo@bar@acme", '@') gives "foo". 464 * 465 * @param string 466 * the String 467 * @param separator 468 * separation character 469 * @return the first part of the String, or the original String if the 470 * separation character is not found. 471 */ 472 public static String getFirstPart(String string, String separator) { 473 int idx = string.indexOf(separator); 474 if (idx >= 0) { 475 return string.substring(0, idx); 476 } 477 return string; 478 } 479 480 /** 481 * Variant of {@link #getFirstPart(String, String)} which accepts a single char 482 * as the separator. 483 * 484 * @see #getFirstPart(String, String) 485 */ 486 public static String getFirstPart(String string, char separator) { 487 return getFirstPart(string, String.valueOf(separator)); 488 } 489 490 /** 491 * Returns the last part of a String whose parts are separated by the given 492 * String. 493 * 494 * E.g., getLastPart("org.conqat##lib.commons.string##StringUtils", "##") gives 495 * "StringUtils". If separator is the empty string, this method returns the 496 * empty string. 497 * 498 * @param string 499 * the String 500 * @param separator 501 * separation String 502 * @return the last part of the String, or the original String if the separation 503 * String is not found. 504 */ 505 public static String getLastPart(String string, String separator) { 506 int idx = string.lastIndexOf(separator); 507 if (idx >= 0) { 508 return string.substring(idx + separator.length()); 509 } 510 return string; 511 } 512 513 /** 514 * Variant of {@link #getLastPart(String, String)} which accepts a single char 515 * as the separator. 516 * 517 * @see #getLastPart(String, String) 518 */ 519 public static String getLastPart(String string, char separator) { 520 return getLastPart(string, String.valueOf(separator)); 521 } 522 523 /** 524 * Splits the string at the last occurence of {@code separator}. If the 525 * separator does not occur, the second string in the returned pair is empty. 526 * E.g. splitAtLast(foo.bar.baz, '.') gives ("foo.bar", "baz). 527 */ 528 public static Pair<String, String> splitAtLast(String string, char separator) { 529 int idx = string.lastIndexOf(separator); 530 if (idx == -1) { 531 return new Pair<>(string, ""); 532 } 533 return new Pair<>(string.substring(0, idx), string.substring(idx + 1)); 534 } 535 536 /** 537 * Searches the elements of a string array for a string. Strings are trimmed. 538 * 539 * @param array 540 * the array to search 541 * @param string 542 * the search string 543 * @return the index of the element where the string was found or 544 * <code>-1</code> if string wasn't found. 545 */ 546 public static int indexOf(String[] array, String string) { 547 for (int i = 0; i < array.length; i++) { 548 if (array[i].trim().equals(string.trim())) { 549 return i; 550 } 551 } 552 return -1; 553 } 554 555 /** 556 * Checks if a string is empty (after trimming). 557 * 558 * @param text 559 * the string to check. 560 * @return <code>true</code> if string is empty or <code>null</code>, 561 * <code>false</code> otherwise. 562 */ 563 public static boolean isEmpty(String text) { 564 if (text == null) { 565 return true; 566 } 567 return EMPTY_STRING.equals(text.trim()); 568 } 569 570 /** 571 * Checks if the given string contains at least one letter (checked with 572 * {@link Character#isLetter(char)}). 573 */ 574 public static boolean containsLetter(String s) { 575 for (int i = 0; i < s.length(); i++) { 576 if (Character.isLetter(s.charAt(i))) { 577 return true; 578 } 579 } 580 return false; 581 } 582 583 /** Returns whether s1 contains s2 ignoring case */ 584 public static boolean containsIgnoreCase(String s1, String s2) { 585 return s1.toLowerCase().contains(s2.toLowerCase()); 586 } 587 588 /** Returns whether s contains all the given substrings. */ 589 public static boolean containsAll(String s, String... substrings) { 590 for (String substring : substrings) { 591 if (!s.contains(substring)) { 592 return false; 593 } 594 } 595 return true; 596 } 597 598 /** 599 * Generates a random string with a certain length. The string consists of 600 * characters with ASCII code between 33 and 126. 601 * 602 * @param length 603 * the length of the random string 604 * @return the random string 605 */ 606 public static String randomString(int length) { 607 return randomString(length, random); 608 } 609 610 /** 611 * Performs the actal creation of the random string using the given randomizer. 612 */ 613 public static String randomString(int length, Random random) { 614 char[] characters = new char[length]; 615 for (int i = 0; i < length; i++) { 616 characters[i] = (char) (random.nextInt(93) + 33); 617 } 618 return new String(characters); 619 } 620 621 /** 622 * Generates an array of random strings. 623 * 624 * @param length 625 * number of strings 626 * @param stringLength 627 * length of each string 628 * @return the randomly generated array. 629 */ 630 public static String[] randomStringArray(int length, int stringLength) { 631 String[] array = new String[length]; 632 for (int i = 0; i < length; i++) { 633 array[i] = randomString(stringLength); 634 } 635 return array; 636 } 637 638 /** 639 * Generates a pseudo random string with a certain length in a deterministic, 640 * reproducable fashion. 641 * 642 * 643 * @param length 644 * the length of the pseudo-random string 645 * @param seed 646 * seed value for the random number generator used for the generation 647 * of the pseudo-random string. If the same seed value is used, the 648 * same pseudo-random string is generated. 649 */ 650 public static String generateString(int length, int seed) { 651 Random seededRandomizer = new Random(seed); 652 return randomString(length, seededRandomizer); 653 } 654 655 /** 656 * Generates an array of pseudo-random strings in a deterministic, reproducable 657 * fashion. 658 * 659 * @param length 660 * number of strings 661 * @param stringLength 662 * length of each string 663 * @param seed 664 * seed value for the random number generator used for the generation 665 * of the pseudo-random string. If the same seed value is used, the 666 * same pseudo-random string array is generated. 667 * @return the randomly generated array. 668 */ 669 public static String[] generateStringArray(int length, int stringLength, int seed) { 670 String[] array = new String[length]; 671 for (int i = 0; i < length; i++) { 672 array[i] = generateString(stringLength, seed + i); 673 } 674 return array; 675 } 676 677 /** 678 * Returns the beginning of a String, cutting off the last part which is 679 * separated by the given character. 680 * 681 * E.g., removeLastPart("org.conqat.lib.commons.string.StringUtils", '.') gives 682 * "org.conqat.lib.commons.string". 683 * 684 * @param string 685 * the String 686 * @param separator 687 * separation character 688 * @return the String without the last part, or the original string (i.e., the 689 * same object) if the separation character is not found. 690 */ 691 public static String removeLastPart(String string, char separator) { 692 int idx = string.lastIndexOf(separator); 693 if (idx == -1) { 694 return string; 695 } 696 697 return string.substring(0, idx); 698 } 699 700 /** 701 * Replaces all occurrences of keys of the given map in the given string with 702 * the associated value in that map. The given map may be <code>null</code>, in 703 * which case the original string is returned unchanged. 704 * 705 * This method is semantically the same as calling 706 * {@link String#replace(CharSequence, CharSequence)} for each of the entries in 707 * the map, but may be significantly faster for many replacements performed on a 708 * short string, since {@link String#replace(CharSequence, CharSequence)} uses 709 * regular expressions internally and results in many String object allocations 710 * when applied iteratively. 711 * 712 * The order in which replacements are applied depends on the order of the map's 713 * entry set. 714 */ 715 public static String replaceFromMap(String string, Map<String, String> replacements) { 716 if (replacements == null) { 717 return string; 718 } 719 720 StringBuilder sb = new StringBuilder(string); 721 for (Entry<String, String> entry : replacements.entrySet()) { 722 String key = entry.getKey(); 723 String value = entry.getValue(); 724 725 int start = sb.indexOf(key, 0); 726 while (start > -1) { 727 int end = start + key.length(); 728 int nextSearchStart = start + value.length(); 729 sb.replace(start, end, value); 730 start = sb.indexOf(key, nextSearchStart); 731 } 732 } 733 return sb.toString(); 734 } 735 736 /** 737 * Removes all occurrences of the specified strings from the given string. 738 */ 739 public static String removeAll(String string, String... stringsToRemove) { 740 if (stringsToRemove == null || stringsToRemove.length == 0) { 741 return string; 742 } 743 744 StringBuilder sb = new StringBuilder(string); 745 for (String key : stringsToRemove) { 746 int start = sb.indexOf(key, 0); 747 while (start > -1) { 748 int end = start + key.length(); 749 sb.delete(start, end); 750 start = sb.indexOf(key, start); 751 } 752 } 753 return sb.toString(); 754 } 755 756 /** 757 * Replace all linebreaks in string with the platform-specific line separator 758 * ({@link #LINE_SEPARATOR}). 759 * 760 * @return a string with linebreaks replaced by the line separator of the 761 * current platform 762 */ 763 public static String normalizeLineSeparatorsPlatformSpecific(String string) { 764 return replaceLineBreaks(string, LINE_SEPARATOR); 765 } 766 767 /** 768 * Replace all linebreaks in string with a line feed symbol (\n), platform 769 * independent. 770 * 771 * @return a string with linebreaks replaced by {@link #LINE_FEED}. 772 */ 773 public static String normalizeLineSeparatorsPlatformIndependent(String string) { 774 return replaceLineBreaks(string, LINE_FEED); 775 } 776 777 /** 778 * Replace all linebreaks in string by a specified symbol. 779 * 780 * @return a string with line breaks replaced. 781 */ 782 public static String replaceLineBreaks(String string, String symbol) { 783 StringBuilder builder = new StringBuilder(); 784 785 LineSplitter lineSplitter = new LineSplitter(string); 786 lineSplitter.setIncludeTrailingEmptyLine(true); 787 788 for (String line : lineSplitter) { 789 builder.append(line); 790 if (lineSplitter.hasNext()) { 791 builder.append(symbol); 792 } 793 } 794 795 return builder.toString(); 796 } 797 798 /** 799 * Split string in lines. For the the empty string and <code>null</code> an 800 * array of length zero is returned. 801 * 802 * @see #splitLinesAsList(String) 803 */ 804 public static String[] splitLines(String content) { 805 List<String> lineList = splitLinesAsList(content); 806 String[] result = new String[lineList.size()]; 807 lineList.toArray(result); 808 return result; 809 } 810 811 /** 812 * Returns the number of occurrences of the given character in the given string. 813 */ 814 public static int countCharacter(String content, char character) { 815 int count = 0; 816 for (char c : content.toCharArray()) { 817 if (c == character) { 818 count++; 819 } 820 } 821 return count; 822 } 823 824 /** Return number of lines in a string. */ 825 public static int countLines(String content) { 826 return countLines(content, false); 827 } 828 829 /** Return number of lines in a string. */ 830 public static int countLines(String content, boolean includeTrailingEmptyLine) { 831 LineSplitter lineSplitter = new LineSplitter(content); 832 lineSplitter.setIncludeTrailingEmptyLine(includeTrailingEmptyLine); 833 int count = 0; 834 while (lineSplitter.next() != null) { 835 count++; 836 } 837 return count; 838 } 839 840 /** 841 * Split string in lines. For the the empty string and <code>null</code> an 842 * empty list is returned. 843 * 844 * @see #splitLines(String) 845 */ 846 public static List<String> splitLinesAsList(String content) { 847 return splitLinesAsList(content, false); 848 } 849 850 /** 851 * Split string in lines. For the the empty string and <code>null</code> an 852 * empty list is returned. 853 * 854 * @see #splitLines(String) 855 */ 856 public static List<String> splitLinesAsList(String content, boolean includeTrailingEmptyLine) { 857 List<String> result = new ArrayList<>(); 858 LineSplitter lineSplitter = new LineSplitter(content); 859 lineSplitter.setIncludeTrailingEmptyLine(includeTrailingEmptyLine); 860 for (String line : lineSplitter) { 861 result.add(line); 862 } 863 return result; 864 } 865 866 /** 867 * Prefixes a string with a prefix and separator if the prefix is not empty. 868 */ 869 public static String addPrefix(String string, String separator, String prefix) { 870 if (StringUtils.isEmpty(prefix)) { 871 return string; 872 } 873 return prefix + separator + string; 874 } 875 876 /** 877 * Suffixes a string with a suffix and separator if the suffix is not empty. 878 */ 879 public static String addSuffix(String string, String separator, String suffix) { 880 if (StringUtils.isEmpty(suffix)) { 881 return string; 882 } 883 return string + separator + suffix; 884 } 885 886 /** 887 * Remove prefix from a string. 888 * 889 * @param string 890 * the string 891 * @param prefix 892 * the prefix 893 * 894 * @return the string without the prefix or the original string if it does not 895 * start with the prefix. 896 */ 897 public static String stripPrefix(String string, String prefix) { 898 if (string.startsWith(prefix)) { 899 return string.substring(prefix.length()); 900 } 901 return string; 902 } 903 904 /** 905 * Remove prefix from a string. This ignores casing, i.e.<code> 906 * stripPrefixIgnoreCase("C:/Programs/", "c:/programs/notepad.exe")</code> will 907 * return <code>"notepad.exe"</code>. 908 * 909 * @param string 910 * the string 911 * @param prefix 912 * the prefix 913 * 914 * @return the string without the prefix or the original string if it does not 915 * start with the prefix. 916 */ 917 public static String stripPrefixIgnoreCase(String string, String prefix) { 918 if (startsWithIgnoreCase(string, prefix)) { 919 return string.substring(prefix.length()); 920 } 921 return string; 922 } 923 924 /** 925 * Remove suffix from a string. 926 * 927 * @param string 928 * the string 929 * @param suffix 930 * the suffix 931 * 932 * @return the string without the suffix or the original string if it does not 933 * end with the suffix. 934 */ 935 public static String stripSuffix(String string, String suffix) { 936 if (string.endsWith(suffix)) { 937 return string.substring(0, string.length() - suffix.length()); 938 } 939 return string; 940 } 941 942 /** Strips all digits from the given String. */ 943 public static String stripDigits(String string) { 944 return string.replaceAll("[0-9]", EMPTY_STRING); 945 } 946 947 /** Strips all trailing digits from the end of the given String. */ 948 public static String stripTrailingDigits(String string) { 949 return string.replaceAll("\\d+$", EMPTY_STRING); 950 } 951 952 /** 953 * Create string representation of a map. 954 */ 955 public static String toString(Map<?, ?> map) { 956 return toString(map, EMPTY_STRING); 957 } 958 959 /** 960 * Create string representation of a map. 961 * 962 * @param map 963 * the map 964 * @param indent 965 * a line indent 966 */ 967 public static String toString(Map<?, ?> map, String indent) { 968 StringBuilder result = new StringBuilder(); 969 Iterator<?> keyIterator = map.keySet().iterator(); 970 971 while (keyIterator.hasNext()) { 972 result.append(indent); 973 Object key = keyIterator.next(); 974 result.append(key); 975 result.append(" = "); 976 result.append(map.get(key)); 977 if (keyIterator.hasNext()) { 978 result.append(LINE_SEPARATOR); 979 } 980 } 981 982 return result.toString(); 983 } 984 985 /** 986 * Convert stack trace of a {@link Throwable} to a string. 987 */ 988 public static String obtainStackTrace(Throwable throwable) { 989 StringWriter result = new StringWriter(); 990 PrintWriter printWriter = new PrintWriter(result); 991 throwable.printStackTrace(printWriter); 992 FileSystemUtils.close(printWriter); 993 FileSystemUtils.close(result); 994 return result.toString(); 995 } 996 997 /** 998 * Test if a string starts with one of the provided prefixes. Returns 999 * <code>false</code> if the list of prefixes is empty. This should only be used 1000 * for short lists of prefixes. 1001 */ 1002 public static boolean startsWithOneOf(String string, String... prefixes) { 1003 for (String prefix : prefixes) { 1004 if (string.startsWith(prefix)) { 1005 return true; 1006 } 1007 } 1008 return false; 1009 } 1010 1011 /** 1012 * Test if a string starts with one of the provided prefixes. Returns 1013 * <code>false</code> if the list of prefixes is empty. This should only be used 1014 * for short lists of prefixes. The given list must not be null. 1015 */ 1016 public static boolean startsWithOneOf(String string, Iterable<String> prefixes) { 1017 for (String prefix : prefixes) { 1018 if (string.startsWith(prefix)) { 1019 return true; 1020 } 1021 } 1022 return false; 1023 } 1024 1025 /** 1026 * Returns whether the given string starts with the prefix ignoring case, i.e. 1027 * <code>startsWithIgnoreCase("foobar", "Foo")</code> will return true. 1028 */ 1029 public static boolean startsWithIgnoreCase(String string, String prefix) { 1030 return string.toLowerCase().startsWith(prefix.toLowerCase()); 1031 } 1032 1033 /** 1034 * Test if a string contains of the provided strings. Returns <code>false</code> 1035 * if the list of strings is empty. This should only be used for short lists of 1036 * strings. 1037 */ 1038 public static boolean containsOneOf(String text, String... strings) { 1039 return containsOneOf(text, Arrays.asList(strings)); 1040 } 1041 1042 /** 1043 * Test if a string contains of the provided strings. Returns <code>false</code> 1044 * if the list of strings is empty. This should only be used for short lists of 1045 * strings. 1046 */ 1047 public static boolean containsOneOf(String text, Iterable<String> strings) { 1048 for (String substring : strings) { 1049 if (text.contains(substring)) { 1050 return true; 1051 } 1052 } 1053 return false; 1054 } 1055 1056 /** 1057 * Returns whether the given String ends with the specified suffix <b>ignoring 1058 * case</b>. 1059 */ 1060 public static boolean endsWithIgnoreCase(String string, String suffix) { 1061 return string.toLowerCase().endsWith(suffix.toLowerCase()); 1062 } 1063 1064 /** 1065 * Test if a string ends with one of the provided suffixes. Returns 1066 * <code>false</code> if the list of prefixes is empty. This should only be used 1067 * for short lists of suffixes. 1068 */ 1069 public static boolean endsWithOneOf(String string, String... suffixes) { 1070 for (String suffix : suffixes) { 1071 if (string.endsWith(suffix)) { 1072 return true; 1073 } 1074 } 1075 return false; 1076 } 1077 1078 /** 1079 * Prefix all lines of a string. This also replaces line breaks with the 1080 * platform-specific line-separator. 1081 * 1082 * @param string 1083 * the string to prefix 1084 * @param prefix 1085 * the prefix to add 1086 * @param prefixFirstLine 1087 * a flag that indicates if the first line should be prefixed or not. 1088 */ 1089 public static String prefixLines(String string, String prefix, boolean prefixFirstLine) { 1090 String[] lines = StringUtils.splitLines(string.trim()); 1091 StringBuilder result = new StringBuilder(); 1092 for (int i = 0; i < lines.length; i++) { 1093 if (i > 0 || prefixFirstLine) { 1094 result.append(prefix); 1095 } 1096 result.append(lines[i]); 1097 if (i < lines.length - 1) { 1098 result.append(LINE_SEPARATOR); 1099 } 1100 } 1101 return result.toString(); 1102 } 1103 1104 /** 1105 * Splits the given string into an array of {@link Character}s. This is mostly 1106 * used for testing purposes, if an array of certain objects is needed. 1107 */ 1108 public static Character[] splitChars(String s) { 1109 Character[] result = new Character[s.length()]; 1110 for (int i = 0; i < result.length; ++i) { 1111 result[i] = s.charAt(i); 1112 } 1113 return result; 1114 } 1115 1116 /** Capitalize string. */ 1117 public static String capitalize(String string) { 1118 if (StringUtils.isEmpty(string)) { 1119 return string; 1120 } 1121 return string.substring(0, 1).toUpperCase() + string.substring(1); 1122 } 1123 1124 /** Decapitalize string. */ 1125 public static String decapitalize(String string) { 1126 if (StringUtils.isEmpty(string)) { 1127 return string; 1128 } 1129 return string.substring(0, 1).toLowerCase() + string.substring(1); 1130 } 1131 1132 /** 1133 * This method splits the input string into words (delimited by whitespace) and 1134 * returns a string whose words are separated by single spaces and whose lines 1135 * are not longer than the given length (unless a very long word occurs)). 1136 */ 1137 public static String wrapLongLines(String s, int maxLineLength) { 1138 String[] words = s.split("\\s+"); 1139 1140 StringBuilder sb = new StringBuilder(); 1141 int lineLength = 0; 1142 for (String word : words) { 1143 if (word.length() == 0) { 1144 continue; 1145 } 1146 1147 if (lineLength > 0) { 1148 if (lineLength + 1 + word.length() > maxLineLength) { 1149 sb.append(LINE_SEPARATOR); 1150 lineLength = 0; 1151 } else { 1152 sb.append(SPACE); 1153 lineLength += 1; 1154 } 1155 } 1156 sb.append(word); 1157 lineLength += word.length(); 1158 } 1159 1160 return sb.toString(); 1161 } 1162 1163 /** Returns the longest common prefix of s and t */ 1164 public static String longestCommonPrefix(String s, String t) { 1165 int n = Math.min(s.length(), t.length()); 1166 for (int i = 0; i < n; i++) { 1167 if (s.charAt(i) != t.charAt(i)) { 1168 return s.substring(0, i); 1169 } 1170 } 1171 return s.substring(0, n); 1172 } 1173 1174 /** Returns the longest common suffix of s and t */ 1175 public static String longestCommonSuffix(String s, String t) { 1176 return reverse(StringUtils.longestCommonPrefix(reverse(s), reverse(t))); 1177 } 1178 1179 /** Reverse a string */ 1180 public static String reverse(String s) { 1181 return new StringBuilder(s).reverse().toString(); 1182 } 1183 1184 /** 1185 * Returns the longest common prefix of the strings in the list or the empty 1186 * string if no common prefix exists. 1187 */ 1188 public static String longestCommonPrefix(Iterable<String> strings) { 1189 Iterator<String> iterator = strings.iterator(); 1190 CCSMAssert.isTrue(iterator.hasNext(), "Expected are at least 2 strings"); 1191 String commonPrefix = iterator.next(); 1192 CCSMAssert.isTrue(iterator.hasNext(), "Expected are at least 2 strings"); 1193 1194 while (iterator.hasNext()) { 1195 commonPrefix = longestCommonPrefix(commonPrefix, iterator.next()); 1196 if (commonPrefix.length() == 0) { 1197 break; 1198 } 1199 } 1200 1201 return commonPrefix; 1202 } 1203 1204 /** Removes whitespace from a string. */ 1205 public static String removeWhitespace(String content) { 1206 StringBuilder result = new StringBuilder(); 1207 for (int i = 0; i < content.length(); i++) { 1208 char c = content.charAt(i); 1209 if (!Character.isWhitespace(c)) { 1210 result.append(c); 1211 } 1212 } 1213 return result.toString(); 1214 } 1215 1216 /** 1217 * Removes all whitespace at the beginning of each line in the given string. 1218 */ 1219 public static String removeWhitespaceAtBeginningOfLine(String content) { 1220 return LEADING_WHITESPACE_PATTERN.matcher(content).replaceAll(StringUtils.EMPTY_STRING); 1221 } 1222 1223 /** 1224 * Creates a unique name which is not contained in the given set of names. If 1225 * possible the given base name is directly returned, otherwise it is extended 1226 * by a number. 1227 */ 1228 public static String createUniqueName(String baseName, Set<String> usedNames) { 1229 String name = baseName; 1230 int i = 1; 1231 while (usedNames.contains(name)) { 1232 name = baseName + ++i; 1233 } 1234 return name; 1235 } 1236 1237 /** 1238 * Transforms a string from camel-case to lower-case with hyphens (aka kebab 1239 * case). 1240 */ 1241 public static String camelCaseToKebabCase(String s) { 1242 return stripPrefix(s.replaceAll("([A-Z][a-z])", "-$1").toLowerCase(), "-"); 1243 } 1244 1245 /** 1246 * Converts a dash-separated name (aka kebab case) to a camel-cased one. 1247 */ 1248 public static String kebabCaseToCamelCase(String name) { 1249 StringBuilder builder = new StringBuilder(); 1250 for (String part : name.split("-")) { 1251 if (part.isEmpty()) { 1252 continue; 1253 } 1254 if (builder.length() == 0) { 1255 builder.append(part); 1256 } else { 1257 builder.append(part.substring(0, 1).toUpperCase()); 1258 builder.append(part.substring(1)); 1259 } 1260 } 1261 return builder.toString(); 1262 } 1263 1264 /** 1265 * Transforms a string from camel-case to upper-case with underscores. 1266 */ 1267 public static String camelCaseToUnderscored(String s) { 1268 return stripPrefix(s.replaceAll("([A-Z][a-z])", "_$1").toUpperCase(), "_"); 1269 } 1270 1271 /** 1272 * Encodes a byte array as a hex string following the method described here: 1273 * http ://stackoverflow.com/questions/9655181/convert-from-byte-array-to-hex- 1274 * string-in-java 1275 */ 1276 public static String encodeAsHex(byte[] data) { 1277 char[] hexChars = new char[data.length * 2]; 1278 for (int j = 0; j < data.length; j++) { 1279 int v = data[j] & 0xFF; 1280 hexChars[j * 2] = HEX_CHARACTERS[v >>> 4]; 1281 hexChars[j * 2 + 1] = HEX_CHARACTERS[v & 0x0F]; 1282 } 1283 return new String(hexChars); 1284 } 1285 1286 /** Decodes a byte array from a hex string. */ 1287 public static byte[] decodeFromHex(String s) { 1288 byte[] result = new byte[s.length() / 2]; 1289 for (int i = 0; i < result.length; ++i) { 1290 result[i] = (byte) Integer.parseInt(s.substring(2 * i, 2 * i + 2), 16); 1291 } 1292 return result; 1293 } 1294 1295 /** 1296 * Format number with number formatter, if number formatter is 1297 * <code>null</code>, this uses {@link String#valueOf(double)}. 1298 */ 1299 public static String format(double number, NumberFormat numberFormat) { 1300 if (numberFormat == null) { 1301 return String.valueOf(number); 1302 } 1303 return numberFormat.format(number); 1304 } 1305 1306 /** 1307 * Regex replacement methods like 1308 * {@link Matcher#appendReplacement(StringBuffer, String)} or 1309 * {@link String#replaceAll(String, String)} treat dollar signs as group 1310 * references. This method escapes replacement strings so that dollar signs are 1311 * treated as literals. 1312 */ 1313 public static String escapeRegexReplacementString(String replacement) { 1314 // this needs to be escape thrice as replaceAll also recognizes the 1315 // dollar sign 1316 return replacement.replaceAll("([$\\\\])", "\\\\$1"); 1317 } 1318 1319 /** 1320 * Converts a string to a (UTF-8) byte representation. This returns null on a 1321 * null input. 1322 */ 1323 public static byte[] stringToBytes(String s) { 1324 if (s == null) { 1325 return null; 1326 } 1327 return s.getBytes(StandardCharsets.UTF_8); 1328 } 1329 1330 /** 1331 * Converts a (UTF-8) byte array to a string. This returns null on a null input. 1332 */ 1333 public static String bytesToString(byte[] b) { 1334 if (b == null) { 1335 return null; 1336 } 1337 return new String(b, StandardCharsets.UTF_8); 1338 } 1339 1340 /** 1341 * Converts a byte array to a string, assuming the given encoding, 1342 * <strong>unless</strong> a byte-order mark included with the bytes implies 1343 * that another encoding is actually used. 1344 * <p> 1345 * This method returns null on a null input. 1346 */ 1347 public static String bytesToString(byte[] b, Charset encoding) { 1348 if (b == null) { 1349 return null; 1350 } 1351 Optional<EByteOrderMark> bom = EByteOrderMark.determineBOM(b); 1352 Charset detectedEncoding = bom.map(EByteOrderMark::getEncoding).orElse(encoding); 1353 int bytesToSkip = bom.map(EByteOrderMark::getBOMLength).orElse(0); 1354 1355 return new String(b, bytesToSkip, b.length - bytesToSkip, detectedEncoding); 1356 } 1357 1358 /** 1359 * Returns a list containing the string representations of the given collection 1360 * of objects. {@link String#valueOf} is used to convert each object. 1361 * <em>null</em> values are included, i.e., the resulting list is guaranteed to 1362 * have the size of the initial collection. 1363 */ 1364 public static List<String> asStringList(Collection<?> objects) { 1365 List<String> result = new ArrayList<>(); 1366 for (Object o : objects) { 1367 result.add(String.valueOf(o)); 1368 } 1369 return result; 1370 } 1371 1372 /** 1373 * Filters the given collection of strings by the given suffix, i.e. the 1374 * resulting list contains only those strings that end with this suffix. 1375 */ 1376 public static List<String> filterBySuffix(String suffix, Collection<String> strings) { 1377 List<String> result = new ArrayList<>(); 1378 for (String s : strings) { 1379 if (s.endsWith(suffix)) { 1380 result.add(s); 1381 } 1382 } 1383 return result; 1384 } 1385 1386 /** 1387 * Converts the given objects into a string list by invoking 1388 * {@link Object#toString()} on each non-null element. For null entries in the 1389 * input, the output will contain a null entry as well. 1390 */ 1391 public static <T> List<String> toStrings(Collection<T> objects) { 1392 List<String> strings = new ArrayList<>(); 1393 for (T t : objects) { 1394 if (t == null) { 1395 strings.add(null); 1396 } else { 1397 strings.add(t.toString()); 1398 } 1399 } 1400 return strings; 1401 } 1402 1403 /** 1404 * Converts the given Object array into a String array by invoking toString on 1405 * each non-null element. For null entries in the input array, the output will 1406 * contain a null entry as well 1407 */ 1408 public static String[] toStringArray(Object[] array) { 1409 return CollectionUtils.toArray(toStrings(Arrays.asList(array)), String.class); 1410 } 1411 1412 /** 1413 * Converts the given String to an {@link InputStream} with UTF-8 encoding. 1414 */ 1415 public static InputStream toInputStream(String string) { 1416 return new ByteArrayInputStream(string.getBytes(StandardCharsets.UTF_8)); 1417 } 1418 1419 /** 1420 * Converts the given {@link InputStream} to a String with UTF-8 encoding. 1421 */ 1422 public static String fromInputStream(InputStream inputStream) throws IOException { 1423 ByteArrayOutputStream result = new ByteArrayOutputStream(); 1424 byte[] buffer = new byte[1024]; 1425 int length = inputStream.read(buffer); 1426 while (length != -1) { 1427 result.write(buffer, 0, length); 1428 length = inputStream.read(buffer); 1429 } 1430 return result.toString(StandardCharsets.UTF_8.name()); 1431 } 1432 1433 /** 1434 * Truncates the given string (if necessary) by removing characters from the end 1435 * and attaching the suffix such that the resulting string has at most length 1436 * characters. length must be >= suffix.length(); 1437 */ 1438 public static String truncate(String string, int length, String suffix) { 1439 CCSMAssert.isTrue(length >= suffix.length(), "Expected length >= suffix.length()"); 1440 if (string.length() <= length) { 1441 return string; 1442 } 1443 return string.substring(0, length - suffix.length()) + suffix; 1444 } 1445 1446 /** 1447 * Calculates the edit distance (aka Levenshtein distance) for two strings, i.e. 1448 * the number of insert, delete or replace operations required to transform one 1449 * string into the other. The running time is O(n*m) and the space complexity is 1450 * O(n+m), where n/m are the lengths of the strings. Note that due to the high 1451 * running time, for long strings the {@link Diff} class should be used, that 1452 * has a more efficient algorithm, but only for insert/delete (not replace 1453 * operation). 1454 * 1455 * Although this is a clean reimplementation, the basic algorithm is explained 1456 * here: http://en.wikipedia.org/wiki/Levenshtein_distance# 1457 * Iterative_with_two_matrix_rows 1458 */ 1459 public static int editDistance(String s, String t) { 1460 char[] sChars = s.toCharArray(); 1461 char[] tChars = t.toCharArray(); 1462 int m = s.length(); 1463 int n = t.length(); 1464 1465 int[] distance = new int[m + 1]; 1466 for (int i = 0; i <= m; ++i) { 1467 distance[i] = i; 1468 } 1469 1470 int[] oldDistance = new int[m + 1]; 1471 for (int j = 1; j <= n; ++j) { 1472 1473 // swap distance and oldDistance 1474 int[] tmp = oldDistance; 1475 oldDistance = distance; 1476 distance = tmp; 1477 1478 distance[0] = j; 1479 for (int i = 1; i <= m; ++i) { 1480 int cost = 1 + Math.min(distance[i - 1], oldDistance[i]); 1481 if (sChars[i - 1] == tChars[j - 1]) { 1482 cost = Math.min(cost, oldDistance[i - 1]); 1483 } else { 1484 cost = Math.min(cost, 1 + oldDistance[i - 1]); 1485 } 1486 distance[i] = cost; 1487 } 1488 } 1489 1490 return distance[m]; 1491 } 1492 1493 /** 1494 * Returns whether the edit distance as calculated by 1495 * {@link #editDistance(String, String)}, is 0 or 1. This implementation is 1496 * significantly more efficient compared to actually calculating the edit 1497 * distance and runs in O(n+m). 1498 * 1499 * The idea is that with at most one change, the start and end of both strings 1500 * must be the same, to traverse from start and end to the first difference. If 1501 * the distance between both pointers is at most one, the edit distance is at 1502 * most one as well. 1503 */ 1504 public static boolean isEditDistanceAtMost1(String s, String t) { 1505 int m = s.length(); 1506 int n = t.length(); 1507 1508 if (Math.abs(n - m) > 1) { 1509 return false; 1510 } 1511 1512 // advance to first characters that differ 1513 int sStart = 0; 1514 int tStart = 0; 1515 while (sStart < m && tStart < n && s.charAt(sStart) == t.charAt(tStart)) { 1516 sStart += 1; 1517 tStart += 1; 1518 } 1519 1520 // reverse advance to first characters that differ 1521 int sEnd = m - 1; 1522 int tEnd = n - 1; 1523 while (sEnd >= sStart && tEnd >= tStart && s.charAt(sEnd) == t.charAt(tEnd)) { 1524 sEnd -= 1; 1525 tEnd -= 1; 1526 } 1527 1528 // as both are exclusive indexes (i.e. we have a difference at the 1529 // index), the indexes must be the same or even overlap to have an edit 1530 // distance of 1 or less. 1531 return sEnd <= sStart && tEnd <= tStart; 1532 } 1533 1534 /** 1535 * Returns a list that contains all entries of the original list as lowercase 1536 * strings. Does not operate in-place! 1537 */ 1538 public static List<String> lowercaseList(Collection<String> strings) { 1539 List<String> lowercaseList = new ArrayList<>(); 1540 for (String string : strings) { 1541 lowercaseList.add(string.toLowerCase()); 1542 } 1543 return lowercaseList; 1544 } 1545 1546 /** 1547 * Returns the input string. Returns the provided default value in case the 1548 * input is null. 1549 */ 1550 public static String defaultIfNull(String input, String defaultValue) { 1551 if (input == null) { 1552 return defaultValue; 1553 } 1554 return input; 1555 } 1556 1557 /** 1558 * Returns the input string. Returns the provided default value in case the 1559 * input is null or the empty string. 1560 */ 1561 public static String defaultIfNullOrEmpty(String input, String defaultValue) { 1562 if (isEmpty(input)) { 1563 return defaultValue; 1564 } 1565 return input; 1566 } 1567 1568 /** 1569 * Returns the input string. Returns {@link #EMPTY_STRING} in case the input is 1570 * null. 1571 */ 1572 public static String emptyIfNull(String input) { 1573 return defaultIfNull(input, EMPTY_STRING); 1574 } 1575 1576 /** 1577 * Splits a string at every top-level occurrence of the separator character. 1578 * This can be useful e.g. for splitting type parameter lists. 1579 * <code>"String,Map<String,Integer>,Map<String,Map<String,Integer>>"</code> 1580 * split at ',' with levelStart = '<' and levelEnd = '>' would result in 1581 * <code>["String","Map<String,Integer>","Map<String,Map<String,Integer>>"]</code> 1582 * 1583 * If there is no separator char, use the levelStart char. In this case, 1584 * separator chars are included in the output. 1585 * <code>splitTopLevel("((a))(b)", '(', '(', ')' ) = ["", "((a))", "(b)"]</code> 1586 * 1587 * Can also be used to split columns from a CSV line where values can be quoted 1588 * <code>splitTopLevel("\"a\";\";\";c", ';', '"', '"' ) = ["\"a\"", "\";\"", "c"]</code> 1589 * 1590 * @param input 1591 * The input string. 1592 * @param separator 1593 * The separator character. 1594 * @param levelStart 1595 * The character that starts a new level. 1596 * @param levelEnd 1597 * The character that ends a level. 1598 * @return The input string split at every top-level separator. 1599 * @throws AssertionError 1600 * If the numbers for opening and closing characters in the input 1601 * string differ. 1602 */ 1603 public static List<String> splitTopLevel(String input, char separator, char levelStart, char levelEnd) { 1604 int currentLevel = 0; 1605 int currentStartIndex = 0; 1606 List<String> result = new ArrayList<>(); 1607 1608 for (int i = 0; i < input.length(); i++) { 1609 char currentChar = input.charAt(i); 1610 if (currentChar == separator && currentLevel == 0) { 1611 result.add(input.substring(currentStartIndex, i)); 1612 if (separator == levelStart) { 1613 currentStartIndex = i; 1614 } else { 1615 currentStartIndex = i + 1; 1616 } 1617 } 1618 if (currentChar == levelEnd && currentLevel > 0) { 1619 currentLevel--; 1620 } else if (currentChar == levelStart) { 1621 currentLevel++; 1622 } 1623 } 1624 1625 CCSMAssert.isTrue(currentLevel == 0, "String is imbalanced: " + input); 1626 1627 result.add(input.substring(currentStartIndex)); 1628 return result; 1629 } 1630 1631 /** 1632 * Ensure that the given string ends with the given suffix, i.e. if it does not 1633 * have the given suffix, the returned string is <code>s + suffix</code>. 1634 */ 1635 public static String ensureEndsWith(String s, String suffix) { 1636 if (!s.endsWith(suffix)) { 1637 return s + suffix; 1638 } 1639 return s; 1640 } 1641 1642 /** 1643 * Ensure that the given string starts with the given prefix, i.e. if it does 1644 * not have the given prefix, it is prepended to the string. 1645 */ 1646 public static String ensureStartsWith(String s, String prefix) { 1647 if (!s.startsWith(prefix)) { 1648 return prefix + s; 1649 } 1650 return s; 1651 } 1652 1653 /** 1654 * Concatenates the list of string with delimiter and add escape character if 1655 * needed. For example following list { "asd,rtz", "rrr", "rrr" } with delimiter 1656 * as comma(,) will produce the following comma(,) delimited sting 1657 * "asd\\,rtz,rrr,rrr" 1658 */ 1659 public static String concatWithEscapeCharacter(List<String> data, String delimiter) { 1660 return data.stream().map(a -> a.replace(delimiter, "\\" + delimiter)).reduce((a, b) -> a + delimiter + b) 1661 .orElse(EMPTY_STRING); 1662 } 1663 1664 /** 1665 * Splits the delimited string with considering escaped delimiters. For example 1666 * following comma(,) delimited string "asd\\,rtz,rrr,rrr" will produce the list 1667 * { "asd,rtz", "rrr", "rrr" } 1668 */ 1669 public static List<String> splitWithEscapeCharacter(String data, String delimiter) { 1670 if (isEmpty(data) || isEmpty(delimiter)) { 1671 return Collections.emptyList(); 1672 } 1673 String regex = "(?<!\\\\)" + delimiter + "\\s*"; 1674 return CollectionUtils.map(Arrays.asList(data.split(regex)), 1675 (part) -> part.trim().replace("\\" + delimiter, delimiter)); 1676 } 1677 1678 /** 1679 * Returns the result of applying all pattern replacements in order, each as 1680 * often as possible (globally). 1681 */ 1682 public static String applyAllReplacements(String s, PairList<Pattern, String> replacements) { 1683 for (int i = 0; i < replacements.size(); ++i) { 1684 s = applyReplacement(s, replacements.getFirst(i), replacements.getSecond(i)); 1685 } 1686 return s; 1687 } 1688 1689 /** 1690 * Returns the replacement as often as possible. This is the equivalent of 1691 * {@link String#replaceAll(String, String)}, but accepting a {@link Pattern} 1692 * instead of a regex string. 1693 */ 1694 public static String applyReplacement(String s, Pattern pattern, String replacement) { 1695 StringBuffer buffer = new StringBuffer(); 1696 Matcher matcher = pattern.matcher(s); 1697 while (matcher.find()) { 1698 matcher.appendReplacement(buffer, replacement); 1699 } 1700 matcher.appendTail(buffer); 1701 return buffer.toString(); 1702 } 1703 1704 /** 1705 * Returns {@code null} if the input string is empty (after trimming) or 1706 * {@code null}. Otherwise, the input is returned unaltered. 1707 */ 1708 public static String nullIfEmpty(String input) { 1709 if (isEmpty(input)) { 1710 return null; 1711 } 1712 return input; 1713 } 1714 1715 /** 1716 * Checks whether the parameter contains only number literals and (optionally) 1717 * starts with a '-' char. Returns false if the string is null or empty. 1718 */ 1719 public static boolean isInteger(String string) { 1720 if (string == null || string.isEmpty()) { 1721 return false; 1722 } 1723 if (string.startsWith("-") && string.length() > 1) { 1724 string = string.substring(1); 1725 } 1726 for (char c : string.toCharArray()) { 1727 if (c < '0' || c > '9') { 1728 return false; 1729 } 1730 } 1731 return true; 1732 } 1733 1734 /** 1735 * Returns the index of the first character in the given string that matches the 1736 * pattern. The pattern is applied to single characters, so it makes no sense to 1737 * supply patterns that would match on longer character sequences. 1738 */ 1739 public static int indexOfMatch(String string, Pattern pattern) { 1740 for (int i = 0; i < string.length(); i++) { 1741 char c = string.charAt(i); 1742 if (pattern.matcher(String.valueOf(c)).matches()) { 1743 return i; 1744 } 1745 } 1746 return -1; // no match at all 1747 } 1748 1749 /** 1750 * Escapes the given chars in the content. Prepends a "\" before each occurrence 1751 * of the chars. Special whitespace chars ('\t','\b','\n','\r', and '\f') are 1752 * replaced by "\t",... . Existing occurrences of "\t",... are prepended with a 1753 * "\". 1754 */ 1755 public static String escapeChars(String content, List<Character> chars) { 1756 // replace "\\t" in "foo\\tbar" to "foo\\\\tbar" 1757 Map<String, String> whitespaceEscapeMap = new HashMap<>(); 1758 for (Character whitespaceChar : CollectionUtils.filter(chars, StringUtils::isJavaWhitespaceEscapeCharacter)) { 1759 String escapeSequence = StringUtils.getEscapeSequence(whitespaceChar); 1760 whitespaceEscapeMap.put(escapeSequence, "\\" + escapeSequence); 1761 } 1762 content = replaceFromMap(content, whitespaceEscapeMap); 1763 // replace "foo\tbar" to foo\\tbar" 1764 Map<String, String> escapeMap = chars.stream() 1765 .collect(Collectors.toMap(String::valueOf, StringUtils::getEscapeSequence)); 1766 return replaceFromMap(content, escapeMap); 1767 } 1768 1769 /** 1770 * Returns whether the given character is transformed to a whitespace character 1771 * by Java (e.g., \n). 1772 */ 1773 private static boolean isJavaWhitespaceEscapeCharacter(Character character) { 1774 return character == '\t' || character == '\b' || character == '\n' || character == '\r' || character == '\f'; 1775 } 1776 1777 /** 1778 * Returns the Java escape sequence for the given character. 1779 */ 1780 private static String getEscapeSequence(Character character) { 1781 switch (character) { 1782 case '\t': 1783 return "\\t"; 1784 case '\b': 1785 return "\\b"; 1786 case '\n': 1787 return "\\n"; 1788 case '\r': 1789 return "\\r"; 1790 case '\f': 1791 return "\\f"; 1792 default: 1793 // no Java whitespace transformation. 1794 return "\\" + String.valueOf(character); 1795 } 1796 } 1797 1798 /** 1799 * Un-escapes the given chars in the content. Replaces each occurrence of \a 1800 * with "a" (if "a" is in chars). Whitespace escape sequences (\t, \b, \n, \r, 1801 * and \f) are replaced by their actual values ('\t',...) . Existing occurrences 1802 * of \\t,... are replaced with \t. 1803 */ 1804 public static String unEscapeChars(String content, List<Character> chars) { 1805 Map<String, String> escapeMap = chars.stream() 1806 .collect(Collectors.toMap(StringUtils::getEscapeSequence, String::valueOf)); 1807 // replace "\\x" in "foo\\xbar" to "fooxbar" 1808 content = replaceFromMap(content, escapeMap); 1809 // We might have replaced e.g., \\n with a real line break. We need to revert 1810 // this error in the next step. 1811 Map<String, String> whitespaceEscapeMap = new HashMap<>(); 1812 for (Character whitespaceChar : CollectionUtils.filter(chars, StringUtils::isJavaWhitespaceEscapeCharacter)) { 1813 whitespaceEscapeMap.put("\\" + whitespaceChar, getEscapeSequence(whitespaceChar)); 1814 } 1815 return replaceFromMap(content, whitespaceEscapeMap); 1816 } 1817 1818 /** 1819 * Returns the beginning of the given String, retaining at most numberOfChars 1820 * characters. In case the String is short or equals to numberOfChars, the 1821 * supplied String is returned unchanged. Otherwise the String is truncated to 1822 * numberOfChars characters and suffixed with ... 1823 */ 1824 public static String getBeginning(String s, int numberOfChars) { 1825 if (s.length() <= numberOfChars) { 1826 return s; 1827 } 1828 return s.substring(0, numberOfChars) + "..."; 1829 } 1830 1831 /** 1832 * Converts the given String to a String where the first character is in upper 1833 * case and all other characters are in lower case 1834 */ 1835 public static String toFirstUpper(String s) { 1836 if (isEmpty(s)) { 1837 return s; 1838 } 1839 char first = s.charAt(0); 1840 return Character.toUpperCase(first) + s.substring(1).toLowerCase(); 1841 } 1842 1843 /** 1844 * Returns the string itself, if count is 1. Otherwise returns the string with 1845 * appended "s". 1846 */ 1847 public static String pluralize(String string, int count) { 1848 if (count == 1) { 1849 return string; 1850 } 1851 return string + "s"; 1852 } 1853 1854 /** Adds a prefix and a suffix to the given string. */ 1855 public static String surroundWith(String s, String prefix, String suffix) { 1856 return prefix + s + suffix; 1857 } 1858 1859 /** 1860 * Compares the given value to all given strings. 1861 * 1862 * This loops through the supplied string array. If the array is larger or you 1863 * already have the Strings in a Collection, use 1864 * <code>Collection.contains(..)</code>. Consider putting the arguments into a 1865 * Collection constant. 1866 * 1867 * @return <true> if one string equals the value. 1868 */ 1869 public static boolean equalsOneOf(String value, String... strings) { 1870 for (String compareValue : strings) { 1871 if (value.equals(compareValue)) { 1872 return true; 1873 } 1874 } 1875 return false; 1876 } 1877 1878 /** 1879 * Removes double quotes from beginning and end (if present) and returns the new 1880 * string. 1881 */ 1882 public static String removeDoubleQuotes(String string) { 1883 return stripPrefix(stripSuffix(string, "\""), "\""); 1884 } 1885 1886 /** 1887 * Removes single quotes from beginning and end (if present) and returns the new 1888 * string. 1889 */ 1890 public static String removeSingleQuotes(String string) { 1891 return stripPrefix(stripSuffix(string, "'"), "'"); 1892 } 1893 1894 /** 1895 * Repeats a {@link String} 1896 * 1897 * @param s 1898 * the {@link String} to repeat 1899 * @param times 1900 * number of times the string gets repeated 1901 * @return the repeated {@link String} 1902 */ 1903 public static String repeat(String s, int times) { 1904 return new String(new char[times]).replace("\0", s); 1905 } 1906 1907 /** 1908 * {@link #toString()} with null check. 1909 * 1910 * @param value 1911 * object to stringify 1912 * @return string representation or {@link StringUtils#EMPTY_STRING} if value is 1913 * null. 1914 */ 1915 public static String safeToString(Object value) { 1916 if (value == null) { 1917 return StringUtils.EMPTY_STRING; 1918 } 1919 return value.toString(); 1920 } 1921 1922 /** 1923 * Returns a truncated string that contains only the first x lines of the given 1924 * text. 1925 */ 1926 public static String retainHeadLines(String text, int numberOfLines) { 1927 if (text.isEmpty() || numberOfLines <= 1) { 1928 return ""; 1929 } 1930 int charsBeforeCutLine = 0; 1931 for (int i = 0; i < numberOfLines; i++) { 1932 if (charsBeforeCutLine >= text.length()) { 1933 // numberOfLines is >= lines in text 1934 return text; 1935 } 1936 charsBeforeCutLine = text.indexOf("\n", charsBeforeCutLine) + 1; 1937 } 1938 return text.substring(0, charsBeforeCutLine - 1) + "\n"; 1939 } 1940}