001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser; 018 019import java.util.ArrayList; 020import java.util.Arrays; 021import java.util.Collection; 022import java.util.EnumSet; 023import java.util.List; 024import java.util.Set; 025import java.util.function.BiPredicate; 026import java.util.function.Predicate; 027 028import org.conqat.lib.commons.assertion.CCSMAssert; 029import org.conqat.lib.commons.collections.CollectionUtils; 030 031import com.google.common.base.Preconditions; 032 033import eu.cqse.check.framework.scanner.ELanguage; 034import eu.cqse.check.framework.scanner.ETokenType; 035import eu.cqse.check.framework.scanner.ETokenType.ETokenClass; 036import eu.cqse.check.framework.scanner.IToken; 037 038/** 039 * Utility methods for {@link IToken} lists. 040 */ 041public class TokenStreamUtils { 042 043 /** The value returned if nothing was found in the various find methods. */ 044 public static final int NOT_FOUND = -1; 045 046 /** 047 * Finds the index of the first token in the token list that has any of the of 048 * given token types. 049 * 050 * <p> 051 * For finding sequences, see this method: 052 * {@link #firstTokenOfTypeSequence(List, int, int, ETokenType...)} 053 * 054 * @return integer index (or {@link #NOT_FOUND}). 055 */ 056 public static int firstTokenOfType(List<IToken> tokens, ETokenType... tokenTypes) { 057 return firstTokenOfType(tokens, 0, tokenTypes); 058 } 059 060 /** 061 * Finds the index of the first token in the token list after startIndex that 062 * has any of the of given token types. 063 * 064 * <p> 065 * For finding sequences, see this method: 066 * {@link #firstTokenOfTypeSequence(List, int, int, ETokenType...)} 067 * 068 * @return integer index (or {@link #NOT_FOUND}). 069 */ 070 public static int firstTokenOfType(List<IToken> tokens, int startIndex, ETokenType... tokenTypes) { 071 return firstTokenOfType(tokens, startIndex, tokens.size(), tokenTypes); 072 } 073 074 /** 075 * Finds the index of the first token in the token list (not before startIndex 076 * and before endIndex) that has any of the of given token types. 077 * <p> 078 * For finding sequences, see this method: 079 * {@link #firstTokenOfTypeSequence(List, int, int, ETokenType...)} 080 * 081 * @return integer index (or {@link #NOT_FOUND}). 082 */ 083 public static int firstTokenOfType(List<IToken> tokens, int startIndex, int endIndex, ETokenType... tokenTypes) { 084 EnumSet<ETokenType> set = toEnumSet(tokenTypes); 085 return firstTokenOfType(tokens, startIndex, endIndex, set); 086 } 087 088 /** 089 * Finds the index of the first token in the token list (not before startIndex 090 * and before endIndex) that has any of the of given token types. 091 * <p> 092 * For finding sequences, see this method: 093 * {@link #firstTokenOfTypeSequence(List, int, int, ETokenType...)} 094 * 095 * @return integer index (or {@link #NOT_FOUND}). 096 */ 097 public static int firstTokenOfType(List<IToken> tokens, int startIndex, int endIndex, EnumSet<ETokenType> set) { 098 assertListRange(tokens, startIndex, endIndex); 099 for (int i = startIndex; i < endIndex; ++i) { 100 if (set.contains(tokens.get(i).getType())) { 101 return i; 102 } 103 } 104 return NOT_FOUND; 105 } 106 107 /** 108 * Converts the given token types array into an enum set. 109 */ 110 private static EnumSet<ETokenType> toEnumSet(ETokenType... tokenTypes) { 111 EnumSet<ETokenType> set = EnumSet.noneOf(ETokenType.class); 112 set.addAll(Arrays.asList(tokenTypes)); 113 return set; 114 } 115 116 /** 117 * Returns the index of the last token that has any of the given token types (or 118 * {@link #NOT_FOUND}). 119 */ 120 public static int lastTokenOfType(List<IToken> tokens, ETokenType... tokenTypes) { 121 return lastTokenOfType(tokens, 0, tokenTypes); 122 } 123 124 /** 125 * Returns the index of the last token that has any of the given token types (or 126 * {@link #NOT_FOUND}). 127 */ 128 public static int lastTokenOfType(List<IToken> tokens, EnumSet<ETokenType> tokenTypes) { 129 return lastTokenOfType(tokens, 0, tokenTypes); 130 } 131 132 /** 133 * Returns the index of the last token that has any of the given token types not 134 * before the start index (or {@link #NOT_FOUND}). 135 */ 136 public static int lastTokenOfType(List<IToken> tokens, int startIndex, ETokenType... tokenTypes) { 137 return lastTokenOfType(tokens, startIndex, tokens.size(), tokenTypes); 138 } 139 140 /** 141 * Returns the index of the last token that has any of the given token types not 142 * before the start index (or {@link #NOT_FOUND}). 143 */ 144 public static int lastTokenOfType(List<IToken> tokens, int startIndex, EnumSet<ETokenType> tokenTypes) { 145 return lastTokenOfType(tokens, startIndex, tokens.size(), tokenTypes); 146 } 147 148 /** 149 * Returns the index of the last token that has any of the given token types not 150 * before the start index and before the end index (or {@link #NOT_FOUND}). 151 */ 152 public static int lastTokenOfType(List<IToken> tokens, int startIndex, int endIndex, ETokenType... tokenTypes) { 153 return lastTokenOfType(tokens, startIndex, endIndex, toEnumSet(tokenTypes)); 154 } 155 156 /** 157 * Returns the index of the last token that has any of the given token types not 158 * before the start index and before the end index (or {@link #NOT_FOUND}). 159 */ 160 public static int lastTokenOfType(List<IToken> tokens, int startIndex, int endIndex, 161 EnumSet<ETokenType> tokenTypes) { 162 assertListRange(tokens, startIndex, endIndex); 163 164 for (int i = endIndex - 1; i >= startIndex; --i) { 165 if (tokenTypes.contains(tokens.get(i).getType())) { 166 return i; 167 } 168 } 169 return NOT_FOUND; 170 } 171 172 /** 173 * Returns the starting index of a element - separator - element type token 174 * sequence ending at {@code endOffset} with type {@code element}. Returns 175 * {@link #NOT_FOUND} if no such sequence is found. Useful for example for 176 * matching qualified names (element=IDENTIFIER, separator=DOT) 177 */ 178 public static int firstTokenOfAlternatingTypes(List<IToken> tokens, int endOffset, ETokenType element, 179 ETokenType separator) { 180 Preconditions.checkArgument(endOffset >= 0 && endOffset < tokens.size()); 181 if (tokens.get(endOffset).getType() != element) { 182 return NOT_FOUND; 183 } 184 for (int i = endOffset; i >= 0; i -= 2) { 185 if (i >= 1 && TokenStreamUtils.hasTokenTypeSequence(tokens, i - 1, separator, element)) { 186 continue; 187 } else if (tokens.get(i).getType() == element) { 188 return i; 189 } 190 return NOT_FOUND; 191 } 192 return NOT_FOUND; 193 } 194 195 /** 196 * Asserts that the given start and end indices lie in between the list range 197 * and that the end index is not smaller than the start index. 198 */ 199 private static void assertListRange(List<IToken> tokens, int startIndex, int endIndex) throws AssertionError { 200 CCSMAssert.isTrue(startIndex >= 0, "startIndex must be greater or equal to zero"); 201 CCSMAssert.isTrue(endIndex <= tokens.size(), "endIndex must be less or equal to tokens.size()"); 202 } 203 204 /** 205 * Returns <code>true</code> if the list of tokens contains a token of 206 * <code>tokenType<code>, false otherwise. 207 */ 208 public static boolean contains(List<IToken> tokens, ETokenType tokenType) { 209 return contains(tokens, 0, tokens.size(), tokenType); 210 } 211 212 /** 213 * Returns <code>true</code> if the list of tokens contains a token of 214 * <code>tokenType<code> in the given range, false otherwise. 215 */ 216 public static boolean contains(List<IToken> tokens, int startIndex, int endIndex, ETokenType tokenType) { 217 return firstTokenOfType(tokens, startIndex, endIndex, tokenType) != NOT_FOUND; 218 } 219 220 /** 221 * Returns <code>true</code> if the list of tokens contains a token of 222 * <code>tokenClass<code>, false otherwise. 223 */ 224 public static boolean contains(List<IToken> tokens, ETokenClass tokenClass) { 225 for (IToken token : tokens) { 226 if (token.getType().getTokenClass() == tokenClass) { 227 return true; 228 } 229 } 230 return false; 231 } 232 233 /** 234 * Returns <code>true</code> if the list of tokens contains a any token of given 235 * <code>tokenTypes<code>, false otherwise. 236 */ 237 public static boolean containsAny(List<IToken> tokens, ETokenType... tokenTypes) { 238 return containsAny(tokens, 0, tokens.size(), tokenTypes); 239 } 240 241 /** 242 * Returns <code>true</code> if the given token stream contains one of the 243 * tokens in the given set. 244 */ 245 public static boolean containsAny(List<IToken> tokens, EnumSet<ETokenType> tokenTypeSet) { 246 return containsAny(tokens, 0, tokens.size(), tokenTypeSet); 247 } 248 249 /** 250 * Returns <code>true</code> if the list of tokens contains a any token of given 251 * <code>tokenTypes<code> in the given range, false otherwise. 252 */ 253 public static boolean containsAny(List<IToken> tokens, int startIndex, int endIndex, ETokenType... tokenTypes) { 254 return containsAny(tokens, startIndex, endIndex, toEnumSet(tokenTypes)); 255 } 256 257 /** 258 * Returns <code>true</code> if the list of tokens contains a any token of given 259 * <code>tokenTypes<code> in the given range, false otherwise. 260 */ 261 public static boolean containsAny(List<IToken> tokens, int startIndex, int endIndex, 262 EnumSet<ETokenType> tokenTypes) { 263 assertListRange(tokens, startIndex, endIndex); 264 if (tokenTypes.isEmpty()) { 265 return false; 266 } 267 268 for (int i = startIndex; i < endIndex; ++i) { 269 if (tokenTypes.contains(tokens.get(i).getType())) { 270 return true; 271 } 272 } 273 return false; 274 } 275 276 /** 277 * Returns <code>true</code> if the list of tokens contains at least one token 278 * of each of the given <code>tokenTypes<code>, false otherwise. 279 */ 280 public static boolean containsAll(List<IToken> tokens, ETokenType... tokenTypes) { 281 return containsAll(tokens, 0, tokens.size(), tokenTypes); 282 } 283 284 /** 285 * Returns <code>true</code> if the list of tokens contains at least one token 286 * of each of the given <code>tokenTypes<code> in the given range, false 287 * otherwise. 288 */ 289 public static boolean containsAll(List<IToken> tokens, int startIndex, int endIndex, ETokenType... tokenTypes) { 290 assertListRange(tokens, startIndex, endIndex); 291 292 if (tokenTypes.length == 0) { 293 return true; 294 } 295 296 EnumSet<ETokenType> types = toEnumSet(tokenTypes); 297 for (int i = startIndex; i < endIndex; ++i) { 298 types.remove(tokens.get(i).getType()); 299 if (types.isEmpty()) { 300 return true; 301 } 302 } 303 return false; 304 } 305 306 /** 307 * Returns <code>true</code> if the list of tokens contains a consecutive 308 * subsequence of tokens that match the sequence of token types., false 309 * otherwise. 310 */ 311 public static boolean containsSequence(List<IToken> tokens, int startIndex, int endIndex, 312 ETokenType... tokenTypes) { 313 assertListRange(tokens, startIndex, endIndex); 314 315 OUTER: for (int i = startIndex; i <= endIndex - tokenTypes.length; ++i) { 316 for (int j = 0; j < tokenTypes.length; ++j) { 317 if (tokens.get(i + j).getType() != tokenTypes[j]) { 318 continue OUTER; 319 } 320 } 321 return true; 322 } 323 return false; 324 } 325 326 /** 327 * Returns <code>true</code> if the list of tokens contains a consecutive 328 * subsequence of tokens that match the sequence of token types., false 329 * otherwise. 330 */ 331 public static boolean containsSequence(List<IToken> tokens, ETokenType... tokenTypes) { 332 return containsSequence(tokens, 0, tokens.size(), tokenTypes); 333 } 334 335 /** 336 * Returns the sublist of tokens between the first occurrence of given start 337 * token type and the first occurrence of end token type (after start). If one 338 * of them is not found, an empty list is returned. The tokens for the start and 339 * end are not included in the returned sub list. 340 */ 341 public static List<IToken> tokensBetween(List<IToken> tokens, ETokenType startType, ETokenType endType) { 342 return tokensBetween(tokens, EnumSet.of(startType), EnumSet.of(endType)); 343 } 344 345 /** 346 * Returns the sublist of tokens between the first occurrence of some given 347 * start token types and the first occurrence of any of the given end token 348 * types (after start). If no matches are found, an empty list is returned. The 349 * tokens for the start and end are not included in the returned sub list. 350 */ 351 public static List<IToken> tokensBetween(List<IToken> tokens, EnumSet<ETokenType> startTypes, 352 EnumSet<ETokenType> endTypes) { 353 int start = TokenStreamUtils.firstTokenOfType(tokens, startTypes.toArray(new ETokenType[0])); 354 if (start == NOT_FOUND) { 355 return CollectionUtils.emptyList(); 356 } 357 start += 1; 358 359 int end = TokenStreamUtils.firstTokenOfType(tokens, start, endTypes.toArray(new ETokenType[0])); 360 if (end == NOT_FOUND) { 361 return CollectionUtils.emptyList(); 362 } 363 364 return tokens.subList(start, end); 365 } 366 367 /** 368 * Returns the offset of the first token of closingType, thereby counting 369 * nesting occurrences of openingType and closingType. Returns 370 * {@link #NOT_FOUND} if the token before currentOffset isn't equal to the 371 * openingType or if no token with closingType was found. 372 * 373 * @param currentOffset 374 * this is the offset to start the search and must be one token 375 * <b>after</b> the opening token for which the closing token shall 376 * be found. 377 */ 378 public static int findMatchingClosingToken(List<IToken> tokens, int currentOffset, ETokenType openingType, 379 ETokenType closingType) { 380 int nesting = 1; 381 for (; currentOffset < tokens.size(); ++currentOffset) { 382 ETokenType tokenType = tokens.get(currentOffset).getType(); 383 if (tokenType == openingType) { 384 nesting += 1; 385 } else if (tokenType == closingType) { 386 nesting -= 1; 387 if (nesting == 0) { 388 return currentOffset; 389 } 390 } 391 } 392 return NOT_FOUND; 393 } 394 395 /** 396 * Returns the offset of the first token of closingType, thereby counting 397 * nesting occurrences of openingType and closingType. Returns 398 * {@link #NOT_FOUND} if none was found. 399 * 400 * @param currentOffset 401 * this is the offset to start the search and must be one token 402 * <b>after</b> the opening token for which the closing token shall 403 * be found. 404 */ 405 public static int findMatchingClosingToken(List<IToken> tokens, int currentOffset, EnumSet<ETokenType> openingTypes, 406 EnumSet<ETokenType> closingTypes) { 407 int nesting = 1; 408 for (; currentOffset < tokens.size(); ++currentOffset) { 409 ETokenType tokenType = tokens.get(currentOffset).getType(); 410 if (openingTypes.contains(tokenType)) { 411 nesting += 1; 412 } else if (closingTypes.contains(tokenType)) { 413 nesting -= 1; 414 if (nesting == 0) { 415 return currentOffset; 416 } 417 } 418 } 419 return NOT_FOUND; 420 } 421 422 /** 423 * Returns the offset of the first token of openingType, thereby counting 424 * nesting occurrences of openingType and closingType. Returns 425 * {@link #NOT_FOUND} if none was found. 426 * 427 * @param currentOffset 428 * this is the offset to start the search and must be one token 429 * <b>before</b> the closing token for which the opening token shall 430 * be found. 431 */ 432 public static int findMatchingOpeningToken(List<IToken> tokens, int currentOffset, ETokenType openingType, 433 ETokenType closingType) { 434 int openBraces = 1; 435 for (; currentOffset >= 0; currentOffset--) { 436 ETokenType currentTokenType = tokens.get(currentOffset).getType(); 437 if (currentTokenType.equals(openingType)) { 438 openBraces--; 439 } else if (currentTokenType.equals(closingType)) { 440 openBraces++; 441 } 442 if (openBraces == 0) { 443 return currentOffset; 444 } 445 } 446 return NOT_FOUND; 447 } 448 449 /** 450 * Returns the index of the first top-level token that has the given search type 451 * regarding the nesting introduced by the opening and closing types. If no such 452 * token is found {@link #NOT_FOUND} is returned. 453 */ 454 public static int findFirstTopLevel(List<IToken> tokens, ETokenType searchType, List<ETokenType> openingTypes, 455 List<ETokenType> closingTypes) { 456 return findFirstTopLevel(tokens, EnumSet.of(searchType), openingTypes, closingTypes); 457 } 458 459 /** 460 * Returns the index of the first top-level token that has one of the given 461 * search types regarding the nesting introduced by the opening and closing 462 * types. If no such token is found {@link #NOT_FOUND} is returned. 463 */ 464 public static int findFirstTopLevel(List<IToken> tokens, EnumSet<ETokenType> searchTypes, 465 List<ETokenType> openingTypes, List<ETokenType> closingTypes) { 466 return findFirstTopLevel(tokens, 0, searchTypes, openingTypes, closingTypes); 467 } 468 469 /** 470 * Returns the index of the first top-level token starting at the given index 471 * that has one of the given search types regarding the nesting introduced by 472 * the opening and closing types. If no such token is found {@link #NOT_FOUND} 473 * is returned. 474 */ 475 public static int findFirstTopLevel(List<IToken> tokens, int startIndex, EnumSet<ETokenType> searchTypes, 476 List<ETokenType> openingTypes, List<ETokenType> closingTypes) { 477 return findFirstTopLevel(tokens, startIndex, token -> searchTypes.contains(token.getType()), openingTypes, 478 closingTypes); 479 } 480 481 /** 482 * Returns the index of the first top-level token starting at the given index 483 * that matches the given token predicate regarding the nesting introduced by 484 * the opening and closing types. If no such token is found {@link #NOT_FOUND} 485 * is returned. 486 */ 487 public static int findFirstTopLevel(List<IToken> tokens, int startIndex, Predicate<IToken> searchPredicate, 488 List<ETokenType> openingTypes, List<ETokenType> closingTypes) { 489 CCSMAssert.isTrue(startIndex >= 0, "startIndex must be greater or equal to zero"); 490 491 for (NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, startIndex, openingTypes, 492 closingTypes); iterator.hasNext();) { 493 IToken token = iterator.next(); 494 if (iterator.isTopLevel() && searchPredicate.test(token)) { 495 return iterator.getCurrentIndex(); 496 } 497 } 498 return NOT_FOUND; 499 } 500 501 /** 502 * Returns the index of the first top-level token starting at the given index 503 * that matches the given token predicate regarding the nesting introduced by 504 * the opening and closing types. If no such token is found {@link #NOT_FOUND} 505 * is returned. 506 */ 507 public static int findFirstTopLevelWithIndexPredicate(List<IToken> tokens, int startIndex, 508 Predicate<Integer> searchPredicate, List<ETokenType> openingTypes, List<ETokenType> closingTypes) { 509 CCSMAssert.isTrue(startIndex >= 0, "startIndex must be greater or equal to zero"); 510 511 for (NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, startIndex, openingTypes, 512 closingTypes); iterator.hasNext();) { 513 iterator.next(); 514 if (iterator.isTopLevel() && searchPredicate.test(iterator.getCurrentIndex())) { 515 return iterator.getCurrentIndex(); 516 } 517 } 518 return NOT_FOUND; 519 } 520 521 /** 522 * Returns whether the next tokens starting at the given offset are of given 523 * type. 524 * 525 * @deprecated use {@link #hasTokenTypeSequence(List, int, ETokenType...)} 526 * instead. I leave the method here to avoid hidden merge conflicts. 527 * Can be removed after some releases. 528 */ 529 @Deprecated 530 public static boolean tokenTypesAt(List<IToken> tokens, int startOffset, ETokenType... tokenTypes) { 531 return hasTokenTypeSequence(tokens, startOffset, tokenTypes); 532 } 533 534 /** 535 * Returns whether the given token list starts with given token type sequence. 536 */ 537 public static boolean startsWith(List<IToken> tokens, ETokenType... sequence) { 538 return hasTokenTypeSequence(tokens, 0, sequence); 539 } 540 541 /** 542 * Returns whether the given token list ends with given token type sequence. 543 */ 544 public static boolean endsWith(List<IToken> tokens, ETokenType... sequence) { 545 return hasTokenTypeSequence(tokens, tokens.size() - sequence.length, sequence); 546 } 547 548 /** Counts the number of tokens of a given type in the list of tokens */ 549 public static int count(List<IToken> tokens, ETokenType tokenType) { 550 int counter = 0; 551 for (IToken token : tokens) { 552 if (token.getType().equals(tokenType)) { 553 counter++; 554 } 555 } 556 return counter; 557 } 558 559 /** 560 * Counts the number of tokens of a given type in the list of tokens that are 561 * not nested between the given nesting tokens. 562 */ 563 public static int countWithoutNesting(List<IToken> tokens, ETokenType tokenType, ETokenType openingType, 564 ETokenType closingType) { 565 return countWithoutNesting(tokens, tokenType, Arrays.asList(openingType), Arrays.asList(closingType)); 566 } 567 568 /** 569 * Counts the number of tokens of a given type in the list of tokens that are 570 * not nested between the given nesting tokens. 571 */ 572 public static int countWithoutNesting(List<IToken> tokens, ETokenType tokenType, List<ETokenType> openingType, 573 List<ETokenType> closingType) { 574 int counter = 0; 575 for (NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, 0, openingType, 576 closingType); iterator.hasNext();) { 577 IToken token = iterator.next(); 578 if (iterator.isTopLevel() && token.getType().equals(tokenType)) { 579 counter++; 580 } 581 } 582 return counter; 583 } 584 585 /** 586 * Returns all tokens of the given token types in the given token list. 587 */ 588 public static List<IToken> findAllTokens(List<IToken> tokens, EnumSet<ETokenType> types) { 589 List<IToken> result = new ArrayList<>(); 590 for (Integer index : findAll(tokens, 0, tokens.size(), types)) { 591 result.add(tokens.get(index)); 592 } 593 return result; 594 } 595 596 /** 597 * Returns all indices of the given token types in the given token list. 598 */ 599 public static List<Integer> findAll(List<IToken> tokens, EnumSet<ETokenType> types) { 600 return findAll(tokens, 0, tokens.size(), types); 601 } 602 603 /** 604 * Returns all indices of the given token types in the given token list, 605 * beginning from startOffset (inclusive) to endOffset (exclusive). 606 */ 607 public static List<Integer> findAll(List<IToken> tokens, int startOffset, int endOffset, 608 EnumSet<ETokenType> types) { 609 CCSMAssert.isTrue(startOffset >= 0, "startOffset must be greater or equal to zero"); 610 CCSMAssert.isTrue(endOffset <= tokens.size(), "endOffset must be less or equal to tokens.size()"); 611 612 List<Integer> indices = new ArrayList<>(); 613 for (int i = startOffset; i < endOffset; i++) { 614 if (types.contains(tokens.get(i).getType())) { 615 indices.add(i); 616 } 617 } 618 return indices; 619 } 620 621 /** Splits the given token stream at the tokens of the given types. */ 622 public static List<List<IToken>> split(List<IToken> tokens, EnumSet<ETokenType> splitTypes) { 623 return split(tokens, splitTypes, Integer.MAX_VALUE); 624 } 625 626 /** Splits the given token stream at the tokens of the given types. */ 627 public static List<List<IToken>> split(List<IToken> tokens, ETokenType... splitTypes) { 628 return split(tokens, toEnumSet(splitTypes), Integer.MAX_VALUE); 629 } 630 631 /** 632 * Splits the given token stream at the tokens of the given types, but at most 633 * as often as given in the limit parameter. The resulting list will have at 634 * most <code>limit</code> entries). The limit must be bigger than 0. 635 */ 636 public static List<List<IToken>> split(List<IToken> tokens, EnumSet<ETokenType> splitTypes, int limit) { 637 Predicate<IToken> splitTokenMatcher = token -> splitTypes.contains(token.getType()); 638 return split(tokens, splitTokenMatcher, limit); 639 } 640 641 /** 642 * Splits the given token stream at the tokens matching the given predicate, but 643 * at most as often as given in the limit parameter. The resulting list will 644 * have at most <code>limit</code> entries). The limit must be bigger than 0. 645 */ 646 public static List<List<IToken>> split(List<IToken> tokens, Predicate<IToken> splitTokenMatcher, int limit) 647 throws AssertionError { 648 CCSMAssert.isTrue(limit > 0, "The limit must be greater than zero"); 649 List<List<IToken>> parts = new ArrayList<>(); 650 int start = 0; 651 652 for (int i = 0; i < tokens.size(); i++) { 653 if (parts.size() == limit - 1) { 654 break; 655 } 656 if (splitTokenMatcher.test(tokens.get(i))) { 657 List<IToken> part = tokens.subList(start, i); 658 parts.add(part); 659 start = i + 1; 660 } 661 } 662 parts.add(tokens.subList(start, tokens.size())); 663 return parts; 664 } 665 666 /** 667 * Returns whether the given token list contains tokens with the given types in 668 * that order starting at the given start offset. 669 * 670 * This method returns false when called with invalid offsets. 671 */ 672 public static boolean hasTokenTypeSequence(List<IToken> tokens, int startOffset, ETokenType... sequence) { 673 if (startOffset + sequence.length > tokens.size() || startOffset < 0) { 674 return false; 675 } 676 677 for (int i = 0; i < sequence.length; i++) { 678 if (!tokens.get(i + startOffset).getType().equals(sequence[i])) { 679 return false; 680 } 681 } 682 return true; 683 } 684 685 /** 686 * Returns whether an index of the given token list matches the given 687 * {@link BiPredicate} (which gets the token index and the given token list). 688 */ 689 public static boolean containsTokenIndexPredicate(List<IToken> tokens, 690 BiPredicate<Integer, List<IToken>> predicate) { 691 return firstTokenMatchingIndexPredicate(tokens, 0, predicate) != NOT_FOUND; 692 } 693 694 /** 695 * Returns the first index of the given token list that matches the given 696 * {@link BiPredicate} (which gets the token index and the given token list). 697 * This method iterates over the indices starting with the given startIndex. 698 */ 699 public static int firstTokenMatchingIndexPredicate(List<IToken> tokens, int startIndex, 700 BiPredicate<Integer, List<IToken>> predicate) { 701 for (int i = startIndex; i < tokens.size(); i++) { 702 if (predicate.test(i, tokens)) { 703 return i; 704 } 705 } 706 return NOT_FOUND; 707 } 708 709 /** 710 * Returns the index (between the given start and end indices) of the first 711 * token that starts a sequence of the given token types. If no such token is 712 * found {@link #NOT_FOUND} is returned. 713 */ 714 public static int firstTokenOfTypeSequence(List<IToken> tokens, int startOffset, int endIndex, 715 ETokenType... sequence) { 716 for (int i = startOffset; i < endIndex; i++) { 717 if (hasTokenTypeSequence(tokens, i, sequence)) { 718 return i; 719 } 720 } 721 return NOT_FOUND; 722 } 723 724 /** 725 * Returns the indices of all tokens that starts a sequence of the given token 726 * types (and are between startOffset and endIndex). If no such token is found, 727 * an empty List is returned. 728 */ 729 public static List<Integer> allStartingIndicesOfTypeSequence(List<IToken> tokens, int startOffset, int endIndex, 730 ETokenType... sequence) { 731 List<Integer> startingIndices = new ArrayList<>(); 732 for (int i = startOffset; i < endIndex; i++) { 733 if (hasTokenTypeSequence(tokens, i, sequence)) { 734 startingIndices.add(i); 735 } 736 } 737 return startingIndices; 738 } 739 740 /** 741 * Returns the index of the first occurrence of the given sequence of token 742 * types in the given token list, beginning from the given start offset. If the 743 * sequence if not found, {@link #NOT_FOUND} is returned. 744 */ 745 public static int firstTokenOfTypeSequence(List<IToken> tokens, int startOffset, ETokenType... sequence) { 746 return firstTokenOfTypeSequence(tokens, startOffset, tokens.size() - sequence.length + 1, sequence); 747 } 748 749 /** 750 * Returns all indices of occurrences of the given sequence of token types in 751 * the given token list, beginning from the given offset. If the sequence is not 752 * found, an empty list is returned. 753 */ 754 public static List<Integer> firstTokenOfTypeSequences(List<IToken> tokens, int offset, ETokenType... sequence) { 755 List<Integer> indices = new ArrayList<>(); 756 757 while (offset < tokens.size() - sequence.length + 1) { 758 offset = firstTokenOfTypeSequence(tokens, offset, sequence); 759 if (offset == NOT_FOUND) { 760 break; 761 } 762 indices.add(offset); 763 offset++; 764 } 765 766 return indices; 767 } 768 769 /** 770 * Returns whether the given list's token types equal the given token types. 771 */ 772 public static boolean hasTypes(List<IToken> tokens, ETokenType... types) { 773 if (tokens.size() != types.length) { 774 return false; 775 } 776 777 return hasTokenTypeSequence(tokens, 0, types); 778 } 779 780 /** 781 * Returns all tokens between the tokens of the opening and closing types 782 * regarding nesting. 783 */ 784 public static List<IToken> tokensBetweenWithNesting(List<IToken> tokens, ETokenType openingType, 785 ETokenType closingType) { 786 return tokensBetweenWithNesting(tokens, 0, openingType, closingType); 787 } 788 789 /** 790 * Returns all tokens between the tokens of the opening and closing types 791 * regarding nesting beginning from the given start offset. 792 */ 793 public static List<IToken> tokensBetweenWithNesting(List<IToken> tokens, int startOffset, ETokenType openingType, 794 ETokenType closingType) { 795 int openingIndex = firstTokenOfType(tokens, startOffset, openingType); 796 if (openingIndex == NOT_FOUND) { 797 return CollectionUtils.emptyList(); 798 } 799 800 openingIndex += 1; 801 int closingIndex = findMatchingClosingToken(tokens, openingIndex, openingType, closingType); 802 if (closingIndex == NOT_FOUND) { 803 return CollectionUtils.emptyList(); 804 } 805 806 return tokens.subList(openingIndex, closingIndex); 807 } 808 809 /** 810 * Splits the given token list at the tokens of the given split type. If the 811 * split type is nested between the open and close tokens, it is ignored. 812 */ 813 public static List<List<IToken>> splitWithNesting(List<IToken> tokens, ETokenType splitType, ETokenType openingType, 814 ETokenType closingType) { 815 return splitWithNesting(tokens, splitType, Arrays.asList(openingType), Arrays.asList(closingType)); 816 } 817 818 /** 819 * Splits the given token list at the tokens of the given split type. If the 820 * split type is nested between one of the open and close tokens, it is ignored. 821 * The open and close token type lists must have the same size. None of both 822 * must contain duplicates. 823 */ 824 public static List<List<IToken>> splitWithNesting(List<IToken> tokens, ETokenType splitType, 825 List<ETokenType> openingTypes, List<ETokenType> closingTypes) { 826 return splitWithNesting(tokens, EnumSet.of(splitType), openingTypes, closingTypes, Integer.MAX_VALUE); 827 } 828 829 /** 830 * Splits the given token list at the tokens of the given split types. If one of 831 * the split types is nested between one of the open and close tokens, it is 832 * ignored. The resulting list will have at most limit entries. Limit must be 833 * greater zero. The open and close token type lists must have the same size. 834 * None of both must contain duplicates. 835 */ 836 public static List<List<IToken>> splitWithNesting(List<IToken> tokens, EnumSet<ETokenType> splitTypes, 837 List<ETokenType> openingTypes, List<ETokenType> closingTypes, int limit) { 838 CCSMAssert.isTrue(limit > 0, "The limit must be greater than zero"); 839 840 List<List<IToken>> parts = new ArrayList<>(); 841 int start = 0; 842 for (NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, 0, openingTypes, 843 closingTypes); iterator.hasNext();) { 844 IToken token = iterator.next(); 845 if (parts.size() == limit - 1) { 846 break; 847 } 848 if (iterator.isTopLevel() && splitTypes.contains(token.getType())) { 849 List<IToken> part = tokens.subList(start, iterator.getCurrentIndex()); 850 parts.add(part); 851 start = iterator.getCurrentIndex() + 1; 852 } 853 } 854 855 parts.add(tokens.subList(start, tokens.size())); 856 return parts; 857 } 858 859 /** 860 * Creates a new token from the given referenceToken. 861 * 862 * @param referenceToken 863 * Reference token, whose offset, line number and origin id will be 864 * copied to the new token. 865 * @param text 866 * Text of the newly created token. 867 * @param type 868 * Type of the newly created token. 869 * @return New token with the given text and type. 870 */ 871 public static IToken createToken(IToken referenceToken, String text, ETokenType type) { 872 return referenceToken.newToken(type, referenceToken.getOffset(), referenceToken.getLineNumber(), text, 873 referenceToken.getOriginId()); 874 } 875 876 /** 877 * Creates a new token list copying the given list and using the given reference 878 * token. 879 * 880 * @param referenceToken 881 * Reference token, whose offset, line number and origin id will be 882 * copied to the new tokens. 883 * @param tokens 884 * List of tokens to be duplicated. 885 */ 886 public static List<IToken> copyTokens(IToken referenceToken, List<IToken> tokens) { 887 return CollectionUtils.map(tokens, 888 token -> TokenStreamUtils.createToken(referenceToken, token.getText(), token.getType())); 889 } 890 891 /** 892 * Adds a new token to the given token list at the given position with the given 893 * type and text. The token that is at that position currently is taken as a 894 * reference. The new token will have the same offset, origin ID and line number 895 * as the reference token. 896 * 897 * @param tokens 898 * The token list to modify. Must not be unmodifiable or empty. 899 * @param position 900 * The position at which to insert the new token. Also the position 901 * of the reference token. If this is equal to the size of the token 902 * list, the new token will be appended at the end and the last token 903 * will be taken as the reference token. 904 * @param type 905 * The token type of the new token. 906 * @param text 907 * The text of the new token. 908 */ 909 public static void addToken(List<IToken> tokens, int position, ETokenType type, String text) { 910 CCSMAssert.isFalse(tokens.isEmpty(), 911 "Cannot create new tokens without at least one existing token as a reference"); 912 int referencePosition = position; 913 if (position == tokens.size()) { 914 referencePosition--; 915 } 916 IToken referenceToken = tokens.get(referencePosition); 917 IToken newToken = createToken(referenceToken, text, type); 918 tokens.add(position, newToken); 919 } 920 921 /** 922 * Removes the last token of the given token list if it matches the specified 923 * token type. 924 * 925 * @param tokens 926 * The token list to modify. 927 * @param type 928 * Token type, which should be removed. 929 * @return The token list without the given token type at the end. 930 */ 931 public static List<IToken> removeAtEnd(List<IToken> tokens, ETokenType type) { 932 if (endsWith(tokens, type)) { 933 tokens = tokens.subList(0, tokens.size() - 1); 934 } 935 return tokens; 936 } 937 938 /** 939 * Removes the first token of the given token list if it matches the specified 940 * token type. 941 * 942 * @param tokens 943 * The token list to modify. 944 * @param type 945 * Token type, which should be removed. 946 * @return The token list without the given token type at the beginning. 947 */ 948 public static List<IToken> removeAtFront(List<IToken> tokens, ETokenType type) { 949 if (startsWith(tokens, type)) { 950 tokens = CollectionUtils.getRest(tokens); 951 } 952 return tokens; 953 } 954 955 /** 956 * Searches for the first token with the given type and text (ignoring case). 957 * 958 * @param tokens 959 * The list of tokens to be searched. 960 * @param tokenText 961 * The searched token should have this text (ignoring case). 962 * @param tokenTypes 963 * The searched token should have one of these types. 964 * @return The first token that matches, null if none is found. 965 */ 966 public static IToken getTokenByTypeAndText(List<IToken> tokens, String tokenText, Set<ETokenType> tokenTypes) { 967 for (IToken token : tokens) { 968 if (tokenTypes.contains(token.getType()) && token.getText().equalsIgnoreCase(tokenText)) { 969 return token; 970 } 971 } 972 return null; 973 } 974 975 /** 976 * Filters the tokens by type and returns the text of each filtered token. 977 * 978 * @param tokens 979 * The list of tokens to be searched. 980 * @param tokenType 981 * The type for which filtering is performed. 982 * @return A list containing the texts of all tokens of the passed type. Empty 983 * list if no token was found. 984 */ 985 public static List<String> getTokenTextsByType(List<IToken> tokens, ETokenType tokenType) { 986 List<String> result = new ArrayList<>(); 987 for (IToken token : tokens) { 988 if (token.getType() == tokenType) { 989 result.add(token.getText()); 990 } 991 } 992 return result; 993 } 994 995 /** 996 * Binary search for an {@link IToken} in the given list that has exactly the 997 * given offset ({@link IToken#getOffset()}). Assumes that the tokens in the 998 * given list are ordered by offset. Returns the index of the found token in the 999 * given list or {@value #NOT_FOUND} if no such token is found. 1000 * 1001 * This also works on token lists where offsets are not *strictly* increasing 1002 * (e.g., due to preprocessor expansions). In this case, returns the index of 1003 * the first token that has the given offset. 1004 * 1005 * @param tokens 1006 * The list of tokens in which to search 1007 * @param offset 1008 * The offset for which the token is to find 1009 * @return index of the token in the list or {@value #NOT_FOUND} 1010 */ 1011 public static int indexOfByOffset(List<IToken> tokens, int offset) { 1012 int leftSearchBoundary = 0; 1013 int rightSearchBoundary = tokens.size() - 1; 1014 int matchingIndex = NOT_FOUND; 1015 while (leftSearchBoundary <= rightSearchBoundary) { 1016 int currentIndex = (leftSearchBoundary + rightSearchBoundary) / 2; 1017 if (tokens.get(currentIndex).getOffset() < offset) { 1018 leftSearchBoundary = currentIndex + 1; 1019 } else if (tokens.get(currentIndex).getOffset() > offset) { 1020 rightSearchBoundary = currentIndex - 1; 1021 } else { 1022 matchingIndex = currentIndex; 1023 break; 1024 } 1025 } 1026 if (matchingIndex == NOT_FOUND) { 1027 return NOT_FOUND; 1028 } 1029 // backtrack to first index that matches the offset (might be more than one 1030 // consecutive token in presence of preprocessor expansions). 1031 while (matchingIndex > 0 && tokens.get(matchingIndex - 1).getOffset() == offset) { 1032 matchingIndex--; 1033 } 1034 return matchingIndex; 1035 } 1036 1037 /** 1038 * Returns the indices of the tokens in the given list that satisfy the given 1039 * predicate. 1040 */ 1041 public static List<Integer> indicesOfByPredicate(List<IToken> tokens, Predicate<IToken> includeToken) { 1042 List<Integer> indices = new ArrayList<>(); 1043 for (int i = 0; i < tokens.size(); i++) { 1044 if (includeToken.test(tokens.get(i))) { 1045 indices.add(i); 1046 } 1047 } 1048 return indices; 1049 } 1050 1051 /** 1052 * Returns the {@link ELanguage} of the given tokens (assuming all tokens have 1053 * the same language). Returns <code>null</code> if the given collection is 1054 * empty. 1055 */ 1056 public static ELanguage getLanguage(Collection<IToken> tokens) { 1057 if (tokens.isEmpty()) { 1058 return null; 1059 } 1060 return CollectionUtils.getAny(tokens).getLanguage(); 1061 } 1062 1063 /** 1064 * Returns a list of tokens existing in the same line of a given token, which 1065 * specified by its index. 1066 */ 1067 public static List<IToken> getTokensInSameLine(List<IToken> tokens, int index) { 1068 int tokenLineNumber = tokens.get(index).getLineNumber(); 1069 int leftIndex = index, rightIndex = index; 1070 int size = tokens.size() - 1; 1071 while (leftIndex > 0 && tokens.get(leftIndex - 1).getLineNumber() == tokenLineNumber) { 1072 leftIndex--; 1073 } 1074 while (rightIndex < size && tokens.get(rightIndex + 1).getLineNumber() == tokenLineNumber) { 1075 rightIndex++; 1076 } 1077 return tokens.subList(leftIndex, rightIndex + 1); 1078 } 1079 1080 /** 1081 * Checks if the token is the start of a new statement. 1082 * 1083 * @param token 1084 * The current token. 1085 * @param lastToken 1086 * The previous token. 1087 * @param continueTokens 1088 * Token types that force a statement to continue even in a new line. 1089 * @param nonContinuationTokens 1090 * Token types of class OPERATOR that do not force a statement 1091 * continuation. 1092 * @param newStatementToken 1093 * Token that indicates the start of a new statement. 1094 * @return True if it is the start of a new statement otherwise false. 1095 */ 1096 public static boolean startsNewStatement(IToken token, IToken lastToken, EnumSet<ETokenType> continueTokens, 1097 EnumSet<ETokenType> nonContinuationTokens, ETokenType newStatementToken) { 1098 1099 if (lastToken == null) { 1100 return false; 1101 } 1102 1103 if (token == null) { 1104 return true; 1105 } 1106 1107 ETokenType tokenType = token.getType(); 1108 ETokenType lastTokenType = lastToken.getType(); 1109 1110 if (tokenType == newStatementToken) { 1111 return true; 1112 } 1113 1114 // E.g. 1115 // a++ 1116 // b 1117 // should be treated as two statements. 1118 // a && 1119 // b 1120 // should be treated as one statement. 1121 1122 if (!nonContinuationTokens.contains(lastTokenType) && lastTokenType.getTokenClass() == ETokenClass.OPERATOR) { 1123 return false; 1124 } 1125 1126 if (continueTokens.contains(tokenType) || continueTokens.contains(lastTokenType)) { 1127 return false; 1128 } 1129 1130 /* 1131 * If there is no reason to continue the statement in the next line, end the 1132 * statement once the new token is on a new line. 1133 */ 1134 return lastToken.getLineNumber() < token.getLineNumber(); 1135 } 1136 1137}