Source code

001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser;
018
019import java.util.ArrayList;
020import java.util.Arrays;
021import java.util.Collection;
022import java.util.EnumSet;
023import java.util.List;
024import java.util.Set;
025import java.util.function.BiPredicate;
026import java.util.function.Predicate;
027
028import org.conqat.lib.commons.assertion.CCSMAssert;
029import org.conqat.lib.commons.collections.CollectionUtils;
030
031import com.google.common.base.Preconditions;
032
033import eu.cqse.check.framework.scanner.ELanguage;
034import eu.cqse.check.framework.scanner.ETokenType;
035import eu.cqse.check.framework.scanner.ETokenType.ETokenClass;
036import eu.cqse.check.framework.scanner.IToken;
037
038/**
039 * Utility methods for {@link IToken} lists.
040 */
041public class TokenStreamUtils {
042
043        /** The value returned if nothing was found in the various find methods. */
044        public static final int NOT_FOUND = -1;
045
046        /**
047         * Finds the index of the first token in the token list that has any of the of
048         * given token types.
049         * 
050         * <p>
051         * For finding sequences, see this method:
052         * {@link #firstTokenOfTypeSequence(List, int, int, ETokenType...)}
053         * 
054         * @return integer index (or {@link #NOT_FOUND}).
055         */
056        public static int firstTokenOfType(List<IToken> tokens, ETokenType... tokenTypes) {
057                return firstTokenOfType(tokens, 0, tokenTypes);
058        }
059
060        /**
061         * Finds the index of the first token in the token list after startIndex that
062         * has any of the of given token types.
063         * 
064         * <p>
065         * For finding sequences, see this method:
066         * {@link #firstTokenOfTypeSequence(List, int, int, ETokenType...)}
067         * 
068         * @return integer index (or {@link #NOT_FOUND}).
069         */
070        public static int firstTokenOfType(List<IToken> tokens, int startIndex, ETokenType... tokenTypes) {
071                return firstTokenOfType(tokens, startIndex, tokens.size(), tokenTypes);
072        }
073
074        /**
075         * Finds the index of the first token in the token list (not before startIndex
076         * and before endIndex) that has any of the of given token types.
077         * <p>
078         * For finding sequences, see this method:
079         * {@link #firstTokenOfTypeSequence(List, int, int, ETokenType...)}
080         * 
081         * @return integer index (or {@link #NOT_FOUND}).
082         */
083        public static int firstTokenOfType(List<IToken> tokens, int startIndex, int endIndex, ETokenType... tokenTypes) {
084                EnumSet<ETokenType> set = toEnumSet(tokenTypes);
085                return firstTokenOfType(tokens, startIndex, endIndex, set);
086        }
087
088        /**
089         * Finds the index of the first token in the token list (not before startIndex
090         * and before endIndex) that has any of the of given token types.
091         * <p>
092         * For finding sequences, see this method:
093         * {@link #firstTokenOfTypeSequence(List, int, int, ETokenType...)}
094         *
095         * @return integer index (or {@link #NOT_FOUND}).
096         */
097        public static int firstTokenOfType(List<IToken> tokens, int startIndex, int endIndex, EnumSet<ETokenType> set) {
098                assertListRange(tokens, startIndex, endIndex);
099                for (int i = startIndex; i < endIndex; ++i) {
100                        if (set.contains(tokens.get(i).getType())) {
101                                return i;
102                        }
103                }
104                return NOT_FOUND;
105        }
106
107        /**
108         * Converts the given token types array into an enum set.
109         */
110        private static EnumSet<ETokenType> toEnumSet(ETokenType... tokenTypes) {
111                EnumSet<ETokenType> set = EnumSet.noneOf(ETokenType.class);
112                set.addAll(Arrays.asList(tokenTypes));
113                return set;
114        }
115
116        /**
117         * Returns the index of the last token that has any of the given token types (or
118         * {@link #NOT_FOUND}).
119         */
120        public static int lastTokenOfType(List<IToken> tokens, ETokenType... tokenTypes) {
121                return lastTokenOfType(tokens, 0, tokenTypes);
122        }
123
124        /**
125         * Returns the index of the last token that has any of the given token types (or
126         * {@link #NOT_FOUND}).
127         */
128        public static int lastTokenOfType(List<IToken> tokens, EnumSet<ETokenType> tokenTypes) {
129                return lastTokenOfType(tokens, 0, tokenTypes);
130        }
131
132        /**
133         * Returns the index of the last token that has any of the given token types not
134         * before the start index (or {@link #NOT_FOUND}).
135         */
136        public static int lastTokenOfType(List<IToken> tokens, int startIndex, ETokenType... tokenTypes) {
137                return lastTokenOfType(tokens, startIndex, tokens.size(), tokenTypes);
138        }
139
140        /**
141         * Returns the index of the last token that has any of the given token types not
142         * before the start index (or {@link #NOT_FOUND}).
143         */
144        public static int lastTokenOfType(List<IToken> tokens, int startIndex, EnumSet<ETokenType> tokenTypes) {
145                return lastTokenOfType(tokens, startIndex, tokens.size(), tokenTypes);
146        }
147
148        /**
149         * Returns the index of the last token that has any of the given token types not
150         * before the start index and before the end index (or {@link #NOT_FOUND}).
151         */
152        public static int lastTokenOfType(List<IToken> tokens, int startIndex, int endIndex, ETokenType... tokenTypes) {
153                return lastTokenOfType(tokens, startIndex, endIndex, toEnumSet(tokenTypes));
154        }
155
156        /**
157         * Returns the index of the last token that has any of the given token types not
158         * before the start index and before the end index (or {@link #NOT_FOUND}).
159         */
160        public static int lastTokenOfType(List<IToken> tokens, int startIndex, int endIndex,
161                        EnumSet<ETokenType> tokenTypes) {
162                assertListRange(tokens, startIndex, endIndex);
163
164                for (int i = endIndex - 1; i >= startIndex; --i) {
165                        if (tokenTypes.contains(tokens.get(i).getType())) {
166                                return i;
167                        }
168                }
169                return NOT_FOUND;
170        }
171
172        /**
173         * Returns the starting index of a element - separator - element type token
174         * sequence ending at {@code endOffset} with type {@code element}. Returns
175         * {@link #NOT_FOUND} if no such sequence is found. Useful for example for
176         * matching qualified names (element=IDENTIFIER, separator=DOT)
177         */
178        public static int firstTokenOfAlternatingTypes(List<IToken> tokens, int endOffset, ETokenType element,
179                        ETokenType separator) {
180                Preconditions.checkArgument(endOffset >= 0 && endOffset < tokens.size());
181                if (tokens.get(endOffset).getType() != element) {
182                        return NOT_FOUND;
183                }
184                for (int i = endOffset; i >= 0; i -= 2) {
185                        if (i >= 1 && TokenStreamUtils.hasTokenTypeSequence(tokens, i - 1, separator, element)) {
186                                continue;
187                        } else if (tokens.get(i).getType() == element) {
188                                return i;
189                        }
190                        return NOT_FOUND;
191                }
192                return NOT_FOUND;
193        }
194
195        /**
196         * Asserts that the given start and end indices lie in between the list range
197         * and that the end index is not smaller than the start index.
198         */
199        private static void assertListRange(List<IToken> tokens, int startIndex, int endIndex) throws AssertionError {
200                CCSMAssert.isTrue(startIndex >= 0, "startIndex must be greater or equal to zero");
201                CCSMAssert.isTrue(endIndex <= tokens.size(), "endIndex must be less or equal to tokens.size()");
202        }
203
204        /**
205         * Returns <code>true</code> if the list of tokens contains a token of
206         * <code>tokenType<code>, false otherwise.
207         */
208        public static boolean contains(List<IToken> tokens, ETokenType tokenType) {
209                return contains(tokens, 0, tokens.size(), tokenType);
210        }
211
212        /**
213         * Returns <code>true</code> if the list of tokens contains a token of
214         * <code>tokenType<code> in the given range, false otherwise.
215         */
216        public static boolean contains(List<IToken> tokens, int startIndex, int endIndex, ETokenType tokenType) {
217                return firstTokenOfType(tokens, startIndex, endIndex, tokenType) != NOT_FOUND;
218        }
219
220        /**
221         * Returns <code>true</code> if the list of tokens contains a token of
222         * <code>tokenClass<code>, false otherwise.
223         */
224        public static boolean contains(List<IToken> tokens, ETokenClass tokenClass) {
225                for (IToken token : tokens) {
226                        if (token.getType().getTokenClass() == tokenClass) {
227                                return true;
228                        }
229                }
230                return false;
231        }
232
233        /**
234         * Returns <code>true</code> if the list of tokens contains a any token of given
235         * <code>tokenTypes<code>, false otherwise.
236         */
237        public static boolean containsAny(List<IToken> tokens, ETokenType... tokenTypes) {
238                return containsAny(tokens, 0, tokens.size(), tokenTypes);
239        }
240
241        /**
242         * Returns <code>true</code> if the given token stream contains one of the
243         * tokens in the given set.
244         */
245        public static boolean containsAny(List<IToken> tokens, EnumSet<ETokenType> tokenTypeSet) {
246                return containsAny(tokens, 0, tokens.size(), tokenTypeSet);
247        }
248
249        /**
250         * Returns <code>true</code> if the list of tokens contains a any token of given
251         * <code>tokenTypes<code> in the given range, false otherwise.
252         */
253        public static boolean containsAny(List<IToken> tokens, int startIndex, int endIndex, ETokenType... tokenTypes) {
254                return containsAny(tokens, startIndex, endIndex, toEnumSet(tokenTypes));
255        }
256
257        /**
258         * Returns <code>true</code> if the list of tokens contains a any token of given
259         * <code>tokenTypes<code> in the given range, false otherwise.
260         */
261        public static boolean containsAny(List<IToken> tokens, int startIndex, int endIndex,
262                        EnumSet<ETokenType> tokenTypes) {
263                assertListRange(tokens, startIndex, endIndex);
264                if (tokenTypes.isEmpty()) {
265                        return false;
266                }
267
268                for (int i = startIndex; i < endIndex; ++i) {
269                        if (tokenTypes.contains(tokens.get(i).getType())) {
270                                return true;
271                        }
272                }
273                return false;
274        }
275
276        /**
277         * Returns <code>true</code> if the list of tokens contains at least one token
278         * of each of the given <code>tokenTypes<code>, false otherwise.
279         */
280        public static boolean containsAll(List<IToken> tokens, ETokenType... tokenTypes) {
281                return containsAll(tokens, 0, tokens.size(), tokenTypes);
282        }
283
284        /**
285         * Returns <code>true</code> if the list of tokens contains at least one token
286         * of each of the given <code>tokenTypes<code> in the given range, false
287         * otherwise.
288         */
289        public static boolean containsAll(List<IToken> tokens, int startIndex, int endIndex, ETokenType... tokenTypes) {
290                assertListRange(tokens, startIndex, endIndex);
291
292                if (tokenTypes.length == 0) {
293                        return true;
294                }
295
296                EnumSet<ETokenType> types = toEnumSet(tokenTypes);
297                for (int i = startIndex; i < endIndex; ++i) {
298                        types.remove(tokens.get(i).getType());
299                        if (types.isEmpty()) {
300                                return true;
301                        }
302                }
303                return false;
304        }
305
306        /**
307         * Returns <code>true</code> if the list of tokens contains a consecutive
308         * subsequence of tokens that match the sequence of token types., false
309         * otherwise.
310         */
311        public static boolean containsSequence(List<IToken> tokens, int startIndex, int endIndex,
312                        ETokenType... tokenTypes) {
313                assertListRange(tokens, startIndex, endIndex);
314
315                OUTER: for (int i = startIndex; i <= endIndex - tokenTypes.length; ++i) {
316                        for (int j = 0; j < tokenTypes.length; ++j) {
317                                if (tokens.get(i + j).getType() != tokenTypes[j]) {
318                                        continue OUTER;
319                                }
320                        }
321                        return true;
322                }
323                return false;
324        }
325
326        /**
327         * Returns <code>true</code> if the list of tokens contains a consecutive
328         * subsequence of tokens that match the sequence of token types., false
329         * otherwise.
330         */
331        public static boolean containsSequence(List<IToken> tokens, ETokenType... tokenTypes) {
332                return containsSequence(tokens, 0, tokens.size(), tokenTypes);
333        }
334
335        /**
336         * Returns the sublist of tokens between the first occurrence of given start
337         * token type and the first occurrence of end token type (after start). If one
338         * of them is not found, an empty list is returned. The tokens for the start and
339         * end are not included in the returned sub list.
340         */
341        public static List<IToken> tokensBetween(List<IToken> tokens, ETokenType startType, ETokenType endType) {
342                return tokensBetween(tokens, EnumSet.of(startType), EnumSet.of(endType));
343        }
344
345        /**
346         * Returns the sublist of tokens between the first occurrence of some given
347         * start token types and the first occurrence of any of the given end token
348         * types (after start). If no matches are found, an empty list is returned. The
349         * tokens for the start and end are not included in the returned sub list.
350         */
351        public static List<IToken> tokensBetween(List<IToken> tokens, EnumSet<ETokenType> startTypes,
352                        EnumSet<ETokenType> endTypes) {
353                int start = TokenStreamUtils.firstTokenOfType(tokens, startTypes.toArray(new ETokenType[0]));
354                if (start == NOT_FOUND) {
355                        return CollectionUtils.emptyList();
356                }
357                start += 1;
358
359                int end = TokenStreamUtils.firstTokenOfType(tokens, start, endTypes.toArray(new ETokenType[0]));
360                if (end == NOT_FOUND) {
361                        return CollectionUtils.emptyList();
362                }
363
364                return tokens.subList(start, end);
365        }
366
367        /**
368         * Returns the offset of the first token of closingType, thereby counting
369         * nesting occurrences of openingType and closingType. Returns
370         * {@link #NOT_FOUND} if the token before currentOffset isn't equal to the
371         * openingType or if no token with closingType was found.
372         * 
373         * @param currentOffset
374         *            this is the offset to start the search and must be one token
375         *            <b>after</b> the opening token for which the closing token shall
376         *            be found.
377         */
378        public static int findMatchingClosingToken(List<IToken> tokens, int currentOffset, ETokenType openingType,
379                        ETokenType closingType) {
380                int nesting = 1;
381                for (; currentOffset < tokens.size(); ++currentOffset) {
382                        ETokenType tokenType = tokens.get(currentOffset).getType();
383                        if (tokenType == openingType) {
384                                nesting += 1;
385                        } else if (tokenType == closingType) {
386                                nesting -= 1;
387                                if (nesting == 0) {
388                                        return currentOffset;
389                                }
390                        }
391                }
392                return NOT_FOUND;
393        }
394
395        /**
396         * Returns the offset of the first token of closingType, thereby counting
397         * nesting occurrences of openingType and closingType. Returns
398         * {@link #NOT_FOUND} if none was found.
399         * 
400         * @param currentOffset
401         *            this is the offset to start the search and must be one token
402         *            <b>after</b> the opening token for which the closing token shall
403         *            be found.
404         */
405        public static int findMatchingClosingToken(List<IToken> tokens, int currentOffset, EnumSet<ETokenType> openingTypes,
406                        EnumSet<ETokenType> closingTypes) {
407                int nesting = 1;
408                for (; currentOffset < tokens.size(); ++currentOffset) {
409                        ETokenType tokenType = tokens.get(currentOffset).getType();
410                        if (openingTypes.contains(tokenType)) {
411                                nesting += 1;
412                        } else if (closingTypes.contains(tokenType)) {
413                                nesting -= 1;
414                                if (nesting == 0) {
415                                        return currentOffset;
416                                }
417                        }
418                }
419                return NOT_FOUND;
420        }
421
422        /**
423         * Returns the offset of the first token of openingType, thereby counting
424         * nesting occurrences of openingType and closingType. Returns
425         * {@link #NOT_FOUND} if none was found.
426         * 
427         * @param currentOffset
428         *            this is the offset to start the search and must be one token
429         *            <b>before</b> the closing token for which the opening token shall
430         *            be found.
431         */
432        public static int findMatchingOpeningToken(List<IToken> tokens, int currentOffset, ETokenType openingType,
433                        ETokenType closingType) {
434                int openBraces = 1;
435                for (; currentOffset >= 0; currentOffset--) {
436                        ETokenType currentTokenType = tokens.get(currentOffset).getType();
437                        if (currentTokenType.equals(openingType)) {
438                                openBraces--;
439                        } else if (currentTokenType.equals(closingType)) {
440                                openBraces++;
441                        }
442                        if (openBraces == 0) {
443                                return currentOffset;
444                        }
445                }
446                return NOT_FOUND;
447        }
448
449        /**
450         * Returns the index of the first top-level token that has the given search type
451         * regarding the nesting introduced by the opening and closing types. If no such
452         * token is found {@link #NOT_FOUND} is returned.
453         */
454        public static int findFirstTopLevel(List<IToken> tokens, ETokenType searchType, List<ETokenType> openingTypes,
455                        List<ETokenType> closingTypes) {
456                return findFirstTopLevel(tokens, EnumSet.of(searchType), openingTypes, closingTypes);
457        }
458
459        /**
460         * Returns the index of the first top-level token that has one of the given
461         * search types regarding the nesting introduced by the opening and closing
462         * types. If no such token is found {@link #NOT_FOUND} is returned.
463         */
464        public static int findFirstTopLevel(List<IToken> tokens, EnumSet<ETokenType> searchTypes,
465                        List<ETokenType> openingTypes, List<ETokenType> closingTypes) {
466                return findFirstTopLevel(tokens, 0, searchTypes, openingTypes, closingTypes);
467        }
468
469        /**
470         * Returns the index of the first top-level token starting at the given index
471         * that has one of the given search types regarding the nesting introduced by
472         * the opening and closing types. If no such token is found {@link #NOT_FOUND}
473         * is returned.
474         */
475        public static int findFirstTopLevel(List<IToken> tokens, int startIndex, EnumSet<ETokenType> searchTypes,
476                        List<ETokenType> openingTypes, List<ETokenType> closingTypes) {
477                return findFirstTopLevel(tokens, startIndex, token -> searchTypes.contains(token.getType()), openingTypes,
478                                closingTypes);
479        }
480
481        /**
482         * Returns the index of the first top-level token starting at the given index
483         * that matches the given token predicate regarding the nesting introduced by
484         * the opening and closing types. If no such token is found {@link #NOT_FOUND}
485         * is returned.
486         */
487        public static int findFirstTopLevel(List<IToken> tokens, int startIndex, Predicate<IToken> searchPredicate,
488                        List<ETokenType> openingTypes, List<ETokenType> closingTypes) {
489                CCSMAssert.isTrue(startIndex >= 0, "startIndex must be greater or equal to zero");
490
491                for (NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, startIndex, openingTypes,
492                                closingTypes); iterator.hasNext();) {
493                        IToken token = iterator.next();
494                        if (iterator.isTopLevel() && searchPredicate.test(token)) {
495                                return iterator.getCurrentIndex();
496                        }
497                }
498                return NOT_FOUND;
499        }
500
501        /**
502         * Returns the index of the first top-level token starting at the given index
503         * that matches the given token predicate regarding the nesting introduced by
504         * the opening and closing types. If no such token is found {@link #NOT_FOUND}
505         * is returned.
506         */
507        public static int findFirstTopLevelWithIndexPredicate(List<IToken> tokens, int startIndex,
508                        Predicate<Integer> searchPredicate, List<ETokenType> openingTypes, List<ETokenType> closingTypes) {
509                CCSMAssert.isTrue(startIndex >= 0, "startIndex must be greater or equal to zero");
510
511                for (NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, startIndex, openingTypes,
512                                closingTypes); iterator.hasNext();) {
513                        iterator.next();
514                        if (iterator.isTopLevel() && searchPredicate.test(iterator.getCurrentIndex())) {
515                                return iterator.getCurrentIndex();
516                        }
517                }
518                return NOT_FOUND;
519        }
520
521        /**
522         * Returns whether the next tokens starting at the given offset are of given
523         * type.
524         * 
525         * @deprecated use {@link #hasTokenTypeSequence(List, int, ETokenType...)}
526         *             instead. I leave the method here to avoid hidden merge conflicts.
527         *             Can be removed after some releases.
528         */
529        @Deprecated
530        public static boolean tokenTypesAt(List<IToken> tokens, int startOffset, ETokenType... tokenTypes) {
531                return hasTokenTypeSequence(tokens, startOffset, tokenTypes);
532        }
533
534        /**
535         * Returns whether the given token list starts with given token type sequence.
536         */
537        public static boolean startsWith(List<IToken> tokens, ETokenType... sequence) {
538                return hasTokenTypeSequence(tokens, 0, sequence);
539        }
540
541        /**
542         * Returns whether the given token list ends with given token type sequence.
543         */
544        public static boolean endsWith(List<IToken> tokens, ETokenType... sequence) {
545                return hasTokenTypeSequence(tokens, tokens.size() - sequence.length, sequence);
546        }
547
548        /** Counts the number of tokens of a given type in the list of tokens */
549        public static int count(List<IToken> tokens, ETokenType tokenType) {
550                int counter = 0;
551                for (IToken token : tokens) {
552                        if (token.getType().equals(tokenType)) {
553                                counter++;
554                        }
555                }
556                return counter;
557        }
558
559        /**
560         * Counts the number of tokens of a given type in the list of tokens that are
561         * not nested between the given nesting tokens.
562         */
563        public static int countWithoutNesting(List<IToken> tokens, ETokenType tokenType, ETokenType openingType,
564                        ETokenType closingType) {
565                return countWithoutNesting(tokens, tokenType, Arrays.asList(openingType), Arrays.asList(closingType));
566        }
567
568        /**
569         * Counts the number of tokens of a given type in the list of tokens that are
570         * not nested between the given nesting tokens.
571         */
572        public static int countWithoutNesting(List<IToken> tokens, ETokenType tokenType, List<ETokenType> openingType,
573                        List<ETokenType> closingType) {
574                int counter = 0;
575                for (NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, 0, openingType,
576                                closingType); iterator.hasNext();) {
577                        IToken token = iterator.next();
578                        if (iterator.isTopLevel() && token.getType().equals(tokenType)) {
579                                counter++;
580                        }
581                }
582                return counter;
583        }
584
585        /**
586         * Returns all tokens of the given token types in the given token list.
587         */
588        public static List<IToken> findAllTokens(List<IToken> tokens, EnumSet<ETokenType> types) {
589                List<IToken> result = new ArrayList<>();
590                for (Integer index : findAll(tokens, 0, tokens.size(), types)) {
591                        result.add(tokens.get(index));
592                }
593                return result;
594        }
595
596        /**
597         * Returns all indices of the given token types in the given token list.
598         */
599        public static List<Integer> findAll(List<IToken> tokens, EnumSet<ETokenType> types) {
600                return findAll(tokens, 0, tokens.size(), types);
601        }
602
603        /**
604         * Returns all indices of the given token types in the given token list,
605         * beginning from startOffset (inclusive) to endOffset (exclusive).
606         */
607        public static List<Integer> findAll(List<IToken> tokens, int startOffset, int endOffset,
608                        EnumSet<ETokenType> types) {
609                CCSMAssert.isTrue(startOffset >= 0, "startOffset must be greater or equal to zero");
610                CCSMAssert.isTrue(endOffset <= tokens.size(), "endOffset must be less or equal to tokens.size()");
611
612                List<Integer> indices = new ArrayList<>();
613                for (int i = startOffset; i < endOffset; i++) {
614                        if (types.contains(tokens.get(i).getType())) {
615                                indices.add(i);
616                        }
617                }
618                return indices;
619        }
620
621        /** Splits the given token stream at the tokens of the given types. */
622        public static List<List<IToken>> split(List<IToken> tokens, EnumSet<ETokenType> splitTypes) {
623                return split(tokens, splitTypes, Integer.MAX_VALUE);
624        }
625
626        /** Splits the given token stream at the tokens of the given types. */
627        public static List<List<IToken>> split(List<IToken> tokens, ETokenType... splitTypes) {
628                return split(tokens, toEnumSet(splitTypes), Integer.MAX_VALUE);
629        }
630
631        /**
632         * Splits the given token stream at the tokens of the given types, but at most
633         * as often as given in the limit parameter. The resulting list will have at
634         * most <code>limit</code> entries). The limit must be bigger than 0.
635         */
636        public static List<List<IToken>> split(List<IToken> tokens, EnumSet<ETokenType> splitTypes, int limit) {
637                Predicate<IToken> splitTokenMatcher = token -> splitTypes.contains(token.getType());
638                return split(tokens, splitTokenMatcher, limit);
639        }
640
641        /**
642         * Splits the given token stream at the tokens matching the given predicate, but
643         * at most as often as given in the limit parameter. The resulting list will
644         * have at most <code>limit</code> entries). The limit must be bigger than 0.
645         */
646        public static List<List<IToken>> split(List<IToken> tokens, Predicate<IToken> splitTokenMatcher, int limit)
647                        throws AssertionError {
648                CCSMAssert.isTrue(limit > 0, "The limit must be greater than zero");
649                List<List<IToken>> parts = new ArrayList<>();
650                int start = 0;
651
652                for (int i = 0; i < tokens.size(); i++) {
653                        if (parts.size() == limit - 1) {
654                                break;
655                        }
656                        if (splitTokenMatcher.test(tokens.get(i))) {
657                                List<IToken> part = tokens.subList(start, i);
658                                parts.add(part);
659                                start = i + 1;
660                        }
661                }
662                parts.add(tokens.subList(start, tokens.size()));
663                return parts;
664        }
665
666        /**
667         * Returns whether the given token list contains tokens with the given types in
668         * that order starting at the given start offset.
669         * 
670         * This method returns false when called with invalid offsets.
671         */
672        public static boolean hasTokenTypeSequence(List<IToken> tokens, int startOffset, ETokenType... sequence) {
673                if (startOffset + sequence.length > tokens.size() || startOffset < 0) {
674                        return false;
675                }
676
677                for (int i = 0; i < sequence.length; i++) {
678                        if (!tokens.get(i + startOffset).getType().equals(sequence[i])) {
679                                return false;
680                        }
681                }
682                return true;
683        }
684
685        /**
686         * Returns whether an index of the given token list matches the given
687         * {@link BiPredicate} (which gets the token index and the given token list).
688         */
689        public static boolean containsTokenIndexPredicate(List<IToken> tokens,
690                        BiPredicate<Integer, List<IToken>> predicate) {
691                return firstTokenMatchingIndexPredicate(tokens, 0, predicate) != NOT_FOUND;
692        }
693
694        /**
695         * Returns the first index of the given token list that matches the given
696         * {@link BiPredicate} (which gets the token index and the given token list).
697         * This method iterates over the indices starting with the given startIndex.
698         */
699        public static int firstTokenMatchingIndexPredicate(List<IToken> tokens, int startIndex,
700                        BiPredicate<Integer, List<IToken>> predicate) {
701                for (int i = startIndex; i < tokens.size(); i++) {
702                        if (predicate.test(i, tokens)) {
703                                return i;
704                        }
705                }
706                return NOT_FOUND;
707        }
708
709        /**
710         * Returns the index (between the given start and end indices) of the first
711         * token that starts a sequence of the given token types. If no such token is
712         * found {@link #NOT_FOUND} is returned.
713         */
714        public static int firstTokenOfTypeSequence(List<IToken> tokens, int startOffset, int endIndex,
715                        ETokenType... sequence) {
716                for (int i = startOffset; i < endIndex; i++) {
717                        if (hasTokenTypeSequence(tokens, i, sequence)) {
718                                return i;
719                        }
720                }
721                return NOT_FOUND;
722        }
723
724        /**
725         * Returns the indices of all tokens that starts a sequence of the given token
726         * types (and are between startOffset and endIndex). If no such token is found,
727         * an empty List is returned.
728         */
729        public static List<Integer> allStartingIndicesOfTypeSequence(List<IToken> tokens, int startOffset, int endIndex,
730                        ETokenType... sequence) {
731                List<Integer> startingIndices = new ArrayList<>();
732                for (int i = startOffset; i < endIndex; i++) {
733                        if (hasTokenTypeSequence(tokens, i, sequence)) {
734                                startingIndices.add(i);
735                        }
736                }
737                return startingIndices;
738        }
739
740        /**
741         * Returns the index of the first occurrence of the given sequence of token
742         * types in the given token list, beginning from the given start offset. If the
743         * sequence if not found, {@link #NOT_FOUND} is returned.
744         */
745        public static int firstTokenOfTypeSequence(List<IToken> tokens, int startOffset, ETokenType... sequence) {
746                return firstTokenOfTypeSequence(tokens, startOffset, tokens.size() - sequence.length + 1, sequence);
747        }
748
749        /**
750         * Returns all indices of occurrences of the given sequence of token types in
751         * the given token list, beginning from the given offset. If the sequence is not
752         * found, an empty list is returned.
753         */
754        public static List<Integer> firstTokenOfTypeSequences(List<IToken> tokens, int offset, ETokenType... sequence) {
755                List<Integer> indices = new ArrayList<>();
756
757                while (offset < tokens.size() - sequence.length + 1) {
758                        offset = firstTokenOfTypeSequence(tokens, offset, sequence);
759                        if (offset == NOT_FOUND) {
760                                break;
761                        }
762                        indices.add(offset);
763                        offset++;
764                }
765
766                return indices;
767        }
768
769        /**
770         * Returns whether the given list's token types equal the given token types.
771         */
772        public static boolean hasTypes(List<IToken> tokens, ETokenType... types) {
773                if (tokens.size() != types.length) {
774                        return false;
775                }
776
777                return hasTokenTypeSequence(tokens, 0, types);
778        }
779
780        /**
781         * Returns all tokens between the tokens of the opening and closing types
782         * regarding nesting.
783         */
784        public static List<IToken> tokensBetweenWithNesting(List<IToken> tokens, ETokenType openingType,
785                        ETokenType closingType) {
786                return tokensBetweenWithNesting(tokens, 0, openingType, closingType);
787        }
788
789        /**
790         * Returns all tokens between the tokens of the opening and closing types
791         * regarding nesting beginning from the given start offset.
792         */
793        public static List<IToken> tokensBetweenWithNesting(List<IToken> tokens, int startOffset, ETokenType openingType,
794                        ETokenType closingType) {
795                int openingIndex = firstTokenOfType(tokens, startOffset, openingType);
796                if (openingIndex == NOT_FOUND) {
797                        return CollectionUtils.emptyList();
798                }
799
800                openingIndex += 1;
801                int closingIndex = findMatchingClosingToken(tokens, openingIndex, openingType, closingType);
802                if (closingIndex == NOT_FOUND) {
803                        return CollectionUtils.emptyList();
804                }
805
806                return tokens.subList(openingIndex, closingIndex);
807        }
808
809        /**
810         * Splits the given token list at the tokens of the given split type. If the
811         * split type is nested between the open and close tokens, it is ignored.
812         */
813        public static List<List<IToken>> splitWithNesting(List<IToken> tokens, ETokenType splitType, ETokenType openingType,
814                        ETokenType closingType) {
815                return splitWithNesting(tokens, splitType, Arrays.asList(openingType), Arrays.asList(closingType));
816        }
817
818        /**
819         * Splits the given token list at the tokens of the given split type. If the
820         * split type is nested between one of the open and close tokens, it is ignored.
821         * The open and close token type lists must have the same size. None of both
822         * must contain duplicates.
823         */
824        public static List<List<IToken>> splitWithNesting(List<IToken> tokens, ETokenType splitType,
825                        List<ETokenType> openingTypes, List<ETokenType> closingTypes) {
826                return splitWithNesting(tokens, EnumSet.of(splitType), openingTypes, closingTypes, Integer.MAX_VALUE);
827        }
828
829        /**
830         * Splits the given token list at the tokens of the given split types. If one of
831         * the split types is nested between one of the open and close tokens, it is
832         * ignored. The resulting list will have at most limit entries. Limit must be
833         * greater zero. The open and close token type lists must have the same size.
834         * None of both must contain duplicates.
835         */
836        public static List<List<IToken>> splitWithNesting(List<IToken> tokens, EnumSet<ETokenType> splitTypes,
837                        List<ETokenType> openingTypes, List<ETokenType> closingTypes, int limit) {
838                CCSMAssert.isTrue(limit > 0, "The limit must be greater than zero");
839
840                List<List<IToken>> parts = new ArrayList<>();
841                int start = 0;
842                for (NestingAwareTokenIterator iterator = new NestingAwareTokenIterator(tokens, 0, openingTypes,
843                                closingTypes); iterator.hasNext();) {
844                        IToken token = iterator.next();
845                        if (parts.size() == limit - 1) {
846                                break;
847                        }
848                        if (iterator.isTopLevel() && splitTypes.contains(token.getType())) {
849                                List<IToken> part = tokens.subList(start, iterator.getCurrentIndex());
850                                parts.add(part);
851                                start = iterator.getCurrentIndex() + 1;
852                        }
853                }
854
855                parts.add(tokens.subList(start, tokens.size()));
856                return parts;
857        }
858
859        /**
860         * Creates a new token from the given referenceToken.
861         * 
862         * @param referenceToken
863         *            Reference token, whose offset, line number and origin id will be
864         *            copied to the new token.
865         * @param text
866         *            Text of the newly created token.
867         * @param type
868         *            Type of the newly created token.
869         * @return New token with the given text and type.
870         */
871        public static IToken createToken(IToken referenceToken, String text, ETokenType type) {
872                return referenceToken.newToken(type, referenceToken.getOffset(), referenceToken.getLineNumber(), text,
873                                referenceToken.getOriginId());
874        }
875
876        /**
877         * Creates a new token list copying the given list and using the given reference
878         * token.
879         * 
880         * @param referenceToken
881         *            Reference token, whose offset, line number and origin id will be
882         *            copied to the new tokens.
883         * @param tokens
884         *            List of tokens to be duplicated.
885         */
886        public static List<IToken> copyTokens(IToken referenceToken, List<IToken> tokens) {
887                return CollectionUtils.map(tokens,
888                                token -> TokenStreamUtils.createToken(referenceToken, token.getText(), token.getType()));
889        }
890
891        /**
892         * Adds a new token to the given token list at the given position with the given
893         * type and text. The token that is at that position currently is taken as a
894         * reference. The new token will have the same offset, origin ID and line number
895         * as the reference token.
896         * 
897         * @param tokens
898         *            The token list to modify. Must not be unmodifiable or empty.
899         * @param position
900         *            The position at which to insert the new token. Also the position
901         *            of the reference token. If this is equal to the size of the token
902         *            list, the new token will be appended at the end and the last token
903         *            will be taken as the reference token.
904         * @param type
905         *            The token type of the new token.
906         * @param text
907         *            The text of the new token.
908         */
909        public static void addToken(List<IToken> tokens, int position, ETokenType type, String text) {
910                CCSMAssert.isFalse(tokens.isEmpty(),
911                                "Cannot create new tokens without at least one existing token as a reference");
912                int referencePosition = position;
913                if (position == tokens.size()) {
914                        referencePosition--;
915                }
916                IToken referenceToken = tokens.get(referencePosition);
917                IToken newToken = createToken(referenceToken, text, type);
918                tokens.add(position, newToken);
919        }
920
921        /**
922         * Removes the last token of the given token list if it matches the specified
923         * token type.
924         * 
925         * @param tokens
926         *            The token list to modify.
927         * @param type
928         *            Token type, which should be removed.
929         * @return The token list without the given token type at the end.
930         */
931        public static List<IToken> removeAtEnd(List<IToken> tokens, ETokenType type) {
932                if (endsWith(tokens, type)) {
933                        tokens = tokens.subList(0, tokens.size() - 1);
934                }
935                return tokens;
936        }
937
938        /**
939         * Removes the first token of the given token list if it matches the specified
940         * token type.
941         * 
942         * @param tokens
943         *            The token list to modify.
944         * @param type
945         *            Token type, which should be removed.
946         * @return The token list without the given token type at the beginning.
947         */
948        public static List<IToken> removeAtFront(List<IToken> tokens, ETokenType type) {
949                if (startsWith(tokens, type)) {
950                        tokens = CollectionUtils.getRest(tokens);
951                }
952                return tokens;
953        }
954
955        /**
956         * Searches for the first token with the given type and text (ignoring case).
957         * 
958         * @param tokens
959         *            The list of tokens to be searched.
960         * @param tokenText
961         *            The searched token should have this text (ignoring case).
962         * @param tokenTypes
963         *            The searched token should have one of these types.
964         * @return The first token that matches, null if none is found.
965         */
966        public static IToken getTokenByTypeAndText(List<IToken> tokens, String tokenText, Set<ETokenType> tokenTypes) {
967                for (IToken token : tokens) {
968                        if (tokenTypes.contains(token.getType()) && token.getText().equalsIgnoreCase(tokenText)) {
969                                return token;
970                        }
971                }
972                return null;
973        }
974
975        /**
976         * Filters the tokens by type and returns the text of each filtered token.
977         * 
978         * @param tokens
979         *            The list of tokens to be searched.
980         * @param tokenType
981         *            The type for which filtering is performed.
982         * @return A list containing the texts of all tokens of the passed type. Empty
983         *         list if no token was found.
984         */
985        public static List<String> getTokenTextsByType(List<IToken> tokens, ETokenType tokenType) {
986                List<String> result = new ArrayList<>();
987                for (IToken token : tokens) {
988                        if (token.getType() == tokenType) {
989                                result.add(token.getText());
990                        }
991                }
992                return result;
993        }
994
995        /**
996         * Binary search for an {@link IToken} in the given list that has exactly the
997         * given offset ({@link IToken#getOffset()}). Assumes that the tokens in the
998         * given list are ordered by offset. Returns the index of the found token in the
999         * given list or {@value #NOT_FOUND} if no such token is found.
1000         * 
1001         * This also works on token lists where offsets are not *strictly* increasing
1002         * (e.g., due to preprocessor expansions). In this case, returns the index of
1003         * the first token that has the given offset.
1004         * 
1005         * @param tokens
1006         *            The list of tokens in which to search
1007         * @param offset
1008         *            The offset for which the token is to find
1009         * @return index of the token in the list or {@value #NOT_FOUND}
1010         */
1011        public static int indexOfByOffset(List<IToken> tokens, int offset) {
1012                int leftSearchBoundary = 0;
1013                int rightSearchBoundary = tokens.size() - 1;
1014                int matchingIndex = NOT_FOUND;
1015                while (leftSearchBoundary <= rightSearchBoundary) {
1016                        int currentIndex = (leftSearchBoundary + rightSearchBoundary) / 2;
1017                        if (tokens.get(currentIndex).getOffset() < offset) {
1018                                leftSearchBoundary = currentIndex + 1;
1019                        } else if (tokens.get(currentIndex).getOffset() > offset) {
1020                                rightSearchBoundary = currentIndex - 1;
1021                        } else {
1022                                matchingIndex = currentIndex;
1023                                break;
1024                        }
1025                }
1026                if (matchingIndex == NOT_FOUND) {
1027                        return NOT_FOUND;
1028                }
1029                // backtrack to first index that matches the offset (might be more than one
1030                // consecutive token in presence of preprocessor expansions).
1031                while (matchingIndex > 0 && tokens.get(matchingIndex - 1).getOffset() == offset) {
1032                        matchingIndex--;
1033                }
1034                return matchingIndex;
1035        }
1036
1037        /**
1038         * Returns the indices of the tokens in the given list that satisfy the given
1039         * predicate.
1040         */
1041        public static List<Integer> indicesOfByPredicate(List<IToken> tokens, Predicate<IToken> includeToken) {
1042                List<Integer> indices = new ArrayList<>();
1043                for (int i = 0; i < tokens.size(); i++) {
1044                        if (includeToken.test(tokens.get(i))) {
1045                                indices.add(i);
1046                        }
1047                }
1048                return indices;
1049        }
1050
1051        /**
1052         * Returns the {@link ELanguage} of the given tokens (assuming all tokens have
1053         * the same language). Returns <code>null</code> if the given collection is
1054         * empty.
1055         */
1056        public static ELanguage getLanguage(Collection<IToken> tokens) {
1057                if (tokens.isEmpty()) {
1058                        return null;
1059                }
1060                return CollectionUtils.getAny(tokens).getLanguage();
1061        }
1062
1063        /**
1064         * Returns a list of tokens existing in the same line of a given token, which
1065         * specified by its index.
1066         */
1067        public static List<IToken> getTokensInSameLine(List<IToken> tokens, int index) {
1068                int tokenLineNumber = tokens.get(index).getLineNumber();
1069                int leftIndex = index, rightIndex = index;
1070                int size = tokens.size() - 1;
1071                while (leftIndex > 0 && tokens.get(leftIndex - 1).getLineNumber() == tokenLineNumber) {
1072                        leftIndex--;
1073                }
1074                while (rightIndex < size && tokens.get(rightIndex + 1).getLineNumber() == tokenLineNumber) {
1075                        rightIndex++;
1076                }
1077                return tokens.subList(leftIndex, rightIndex + 1);
1078        }
1079
1080        /**
1081         * Checks if the token is the start of a new statement.
1082         *
1083         * @param token
1084         *            The current token.
1085         * @param lastToken
1086         *            The previous token.
1087         * @param continueTokens
1088         *            Token types that force a statement to continue even in a new line.
1089         * @param nonContinuationTokens
1090         *            Token types of class OPERATOR that do not force a statement
1091         *            continuation.
1092         * @param newStatementToken
1093         *            Token that indicates the start of a new statement.
1094         * @return True if it is the start of a new statement otherwise false.
1095         */
1096        public static boolean startsNewStatement(IToken token, IToken lastToken, EnumSet<ETokenType> continueTokens,
1097                        EnumSet<ETokenType> nonContinuationTokens, ETokenType newStatementToken) {
1098
1099                if (lastToken == null) {
1100                        return false;
1101                }
1102
1103                if (token == null) {
1104                        return true;
1105                }
1106
1107                ETokenType tokenType = token.getType();
1108                ETokenType lastTokenType = lastToken.getType();
1109
1110                if (tokenType == newStatementToken) {
1111                        return true;
1112                }
1113
1114                // E.g.
1115                // a++
1116                // b
1117                // should be treated as two statements.
1118                // a &&
1119                // b
1120                // should be treated as one statement.
1121
1122                if (!nonContinuationTokens.contains(lastTokenType) && lastTokenType.getTokenClass() == ETokenClass.OPERATOR) {
1123                        return false;
1124                }
1125
1126                if (continueTokens.contains(tokenType) || continueTokens.contains(lastTokenType)) {
1127                        return false;
1128                }
1129
1130                /*
1131                 * If there is no reason to continue the statement in the next line, end the
1132                 * statement once the new token is on a new line.
1133                 */
1134                return lastToken.getLineNumber() < token.getLineNumber();
1135        }
1136
1137}