Source code

001/*-----------------------------------------------------------------------+
002 | org.conqat.engine.index.incubator
003 |                                                                       |
004   $Id$            
005 |                                                                       |
006 | Copyright (c)  2009-2013 CQSE GmbH                                 |
007 +-----------------------------------------------------------------------*/
008package eu.cqse.check.framework.util.tokens;
009
010import java.util.ArrayList;
011import java.util.Collections;
012import java.util.EnumSet;
013import java.util.List;
014import java.util.Optional;
015import java.util.Set;
016import java.util.function.Function;
017
018import org.conqat.lib.commons.assertion.CCSMAssert;
019import org.conqat.lib.commons.collections.Pair;
020
021import eu.cqse.check.framework.preprocessor.c.IncludedTokens;
022import eu.cqse.check.framework.scanner.ELanguage;
023import eu.cqse.check.framework.scanner.ETokenType;
024import eu.cqse.check.framework.scanner.IToken;
025import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
026
027/**
028 * Allows parsing a token stream from left to right. Parser can be reset to a
029 * given position.
030 */
031public class TokenStreamParser {
032
033        /** The complete original token stream. */
034        private final List<IToken> tokens;
035
036        /** The current position of the parser. */
037        private int currentToken = 0;
038
039        /**
040         * Constructor.
041         * 
042         * @param tokens
043         *            The tokens to parse.
044         * @param filteredTokens
045         *            A set of token types. All tokens of these types are filtered out
046         *            before parsing, i.e. they are simply skipped.
047         */
048        public TokenStreamParser(List<IToken> tokens, Set<ETokenType> filteredTokens) {
049                this(filter(tokens, filteredTokens));
050        }
051
052        /** Constructor. */
053        public TokenStreamParser(List<IToken> tokens) {
054                this.tokens = tokens;
055        }
056
057        /** Constructor. */
058        public TokenStreamParser(List<IToken> tokens, int startPosition) {
059                assertPositionAllowed(startPosition, tokens);
060                this.tokens = tokens;
061                this.currentToken = startPosition;
062
063        }
064
065        /**
066         * Returns a filtered view of the given token list that does not contain any
067         * tokens of the given types.
068         */
069        private static List<IToken> filter(List<IToken> tokens, Set<ETokenType> filteredTokens) {
070                List<IToken> filtered = new ArrayList<>();
071                for (IToken token : tokens) {
072                        if (!filteredTokens.contains(token.getType())) {
073                                filtered.add(token);
074                        }
075                }
076                return filtered;
077        }
078
079        /**
080         * Consumes all sequential tokens of the given types and returns the consumed
081         * tokens.
082         */
083        public List<IToken> consumeAnyOf(Set<ETokenType> matchTypes) {
084                int start = currentToken;
085                while (isAnyOf(matchTypes)) {
086                        currentToken++;
087                }
088                return tokens.subList(start, currentToken);
089        }
090
091        /**
092         * Consumes one token of the given type and returns the token in an
093         * {@link Optional} or <code>Optional.empty()</code> if the current token is not
094         * of any of those types. This method only advances the parser if the current
095         * token has one of the given types.
096         */
097        public Optional<IToken> consumeOneOrZeroOf(Set<ETokenType> types) {
098                if (isAnyOf(types)) {
099                        if (isDone()) {
100                                return Optional.empty();
101                        }
102                        IToken token = tokens.get(currentToken);
103                        currentToken++;
104                        return Optional.of(token);
105                }
106                return Optional.empty();
107        }
108
109        /**
110         * Consumes one token of the given type and returns the token in an
111         * {@link Optional} or <code>Optional.empty()</code> if the current token is not
112         * of any of those types. This method only advances the parser if the current
113         * token has one of the given types.
114         */
115        public Optional<IToken> consumeOneOrZeroOf(ETokenType type) {
116                return consumeOneOrZeroOf(EnumSet.of(type));
117        }
118
119        /**
120         * Consumes all sequential tokens that are not of the given types and returns
121         * their texts.
122         */
123        public List<String> consumeAnyExcept(Set<ETokenType> types) {
124                List<String> texts = new ArrayList<>();
125                while (!isAnyOf(types)) {
126                        if (currentType() == null) {
127                                break;
128                        }
129                        texts.add(currentText());
130                        currentToken++;
131                }
132                return texts;
133        }
134
135        /**
136         * Skips to the first occurrence of one of the given {@link ETokenType}s,
137         * ignoring tokens nested in pairs of the given opening/closing
138         * {@link ETokenType}s.
139         * 
140         * If no top-level occurrence of one of the search types is found, the parser
141         * position is left unchanged and <code>false</code> is returned. If an
142         * occurrence of one of the search types is found, the parser is left pointing
143         * at it and <code>true</code> is returned.
144         * 
145         * @return <code>true</code> if the skipping succeeded and <code>false</code> if
146         *         no top-level occurrence of the searchTypes could be found.
147         */
148        public boolean skipToFirstTopLevel(EnumSet<ETokenType> searchTypes, List<ETokenType> openingTypes,
149                        List<ETokenType> closingTypes) {
150                int firstTopLevelOccurrence = TokenStreamUtils.findFirstTopLevel(tokens, currentToken, searchTypes,
151                                openingTypes, closingTypes);
152                if (firstTopLevelOccurrence == TokenStreamUtils.NOT_FOUND) {
153                        return false;
154                }
155                currentToken = firstTopLevelOccurrence;
156                return true;
157        }
158
159        /**
160         * Skips balanced braces as long as they only contain the allowed inner types.
161         * Checks that the current token is of the opening type.
162         * 
163         * Skipping may be interrupted prematurely if unexpected tokens are encountered.
164         * In this case, the parser is left pointing to the bad token.
165         * 
166         * @return Returns a Pair. The first element states whether this method
167         *         succeeded (<code>true</code> if did not encounter a disallowed inner
168         *         type). The second element is the list of consumed {@link IToken}s.
169         */
170        public Pair<Boolean, List<IToken>> skipBalanced(ETokenType openingType, ETokenType closingType,
171                        Set<ETokenType> allowedInnerTypes) {
172                CCSMAssert.isFalse(allowedInnerTypes.contains(openingType),
173                                "The given inner token types contain the opening token type");
174                CCSMAssert.isFalse(allowedInnerTypes.contains(closingType),
175                                "The given inner token types contain the closing token type");
176                if (currentType() != openingType) {
177                        return Pair.createPair(false, Collections.emptyList());
178                }
179                ArrayList<IToken> consumedTokens = new ArrayList<>();
180                consumeOneOrZeroOf(EnumSet.of(openingType)).ifPresent(consumedTokens::add);
181
182                int openCount = 1;
183                while (openCount > 0) {
184                        consumedTokens.addAll(consumeAnyOf(allowedInnerTypes));
185                        Optional<IToken> consumed;
186                        if ((consumed = consumeOneOrZeroOf(EnumSet.of(openingType))).isPresent()) {
187                                openCount++;
188                                consumedTokens.add(consumed.get());
189                        } else if ((consumed = consumeOneOrZeroOf(EnumSet.of(closingType))).isPresent()) {
190                                openCount--;
191                                consumedTokens.add(consumed.get());
192                        } else {
193                                return Pair.createPair(false, consumedTokens);
194                        }
195                }
196                return Pair.createPair(true, consumedTokens);
197        }
198
199        /**
200         * Syntactic sugar for skipping balanced LPAREN and RPAREN. Wraps
201         * {@link #skipBalanced(ETokenType, ETokenType, Set)} with
202         * {@link ETokenType#LPAREN} and {@link ETokenType#RPAREN}.
203         */
204        public Pair<Boolean, List<IToken>> skipBalancedParentheses() {
205                return skipBalanced(ETokenType.LPAREN, ETokenType.RPAREN,
206                                EnumSet.complementOf(EnumSet.of(ETokenType.LPAREN, ETokenType.RPAREN)));
207        }
208
209        /** Consumes tokens as long as they alternate between the two type sets. */
210        public List<IToken> consumeAlternating(Set<ETokenType> typeSet1, Set<ETokenType> typeSet2) {
211                List<IToken> list = new ArrayList<>();
212                while (true) {
213                        Optional<IToken> token1 = consumeOneOrZeroOf(typeSet1);
214                        if (!token1.isPresent()) {
215                                break;
216                        }
217                        list.add(token1.get());
218                        Optional<IToken> token2 = consumeOneOrZeroOf(typeSet2);
219                        if (!token2.isPresent()) {
220                                break;
221                        }
222                        list.add(token2.get());
223                }
224                return list;
225        }
226
227        /**
228         * Returns <code>true</code> if the current token has any of the given types.
229         */
230        public boolean isAnyOf(Set<ETokenType> types) {
231                return types.contains(currentType());
232        }
233
234        /**
235         * Returns <code>true</code> if all tokens have been consumed and parsing is
236         * done.
237         */
238        public boolean isDone() {
239                return currentToken >= tokens.size();
240        }
241
242        /**
243         * Returns the text of the current token or <code>null</code> if all tokens have
244         * been consumed.
245         */
246        public String currentText() {
247                if (isDone()) {
248                        return null;
249                }
250                return tokens.get(currentToken).getText();
251        }
252
253        /**
254         * Returns the type of the current token or <code>null</code> if all tokens have
255         * been consumed.
256         */
257        public ETokenType currentType() {
258                if (isDone()) {
259                        return null;
260                }
261                return tokens.get(currentToken).getType();
262        }
263
264        /** Returns whether the next tokens correspond to the given sequence */
265        public boolean continuesWithSequence(ETokenType... tokenTypes) {
266                return !isDone() && TokenStreamUtils.containsSequence(tokens, currentToken, tokens.size() - 1, tokenTypes);
267        }
268
269        /**
270         * Skips to the beginning of the specified sequence or to the end if the
271         * sequence was not found
272         */
273        public void skipToSequence(ETokenType... tokenTypes) {
274                int index = TokenStreamUtils.firstTokenOfTypeSequence(tokens, currentToken, tokenTypes);
275                if (index != TokenStreamUtils.NOT_FOUND) {
276                        currentToken = index;
277                } else {
278                        skipToEnd();
279                }
280        }
281
282        /**
283         * Returns the number of tokens consumed so far.
284         */
285        public int getConsumedTokenCount() {
286                return currentToken;
287        }
288
289        /**
290         * Reset parser position to the given token index. Current position is
291         * {@link #getConsumedTokenCount()}.
292         */
293        public void resetToPosition(int newPosition) {
294                assertPositionAllowed(newPosition, tokens);
295                currentToken = newPosition;
296        }
297
298        /**
299         * Asserts that the given index is contained in the list.
300         */
301        private static void assertPositionAllowed(int index, List<IToken> tokens) throws AssertionError {
302                CCSMAssert.isTrue(index >= 0 && index <= tokens.size(),
303                                "The new position is out of range (position: " + index + " range: 0--" + (tokens.size() - 1));
304        }
305
306        /** {@inheritDoc} */
307        @Override
308        public String toString() {
309                return "TokenStreamParser " + tokens.subList(currentToken, tokens.size());
310        }
311
312        /** @see #tokens */
313        public List<IToken> getTokens() {
314                return tokens;
315        }
316
317        /**
318         * Tries to apply the given token consumer to this {@link TokenStreamParser}. If
319         * the consumer returns {@link Optional#empty()}, resets the parser to the
320         * current position and returns {@link Optional#empty()}. Otherwise, returns the
321         * return value of the consumer.
322         */
323        public <T> Optional<T> tryConsume(Function<TokenStreamParser, Optional<T>> consumer) {
324                int parserPositionBefore = getConsumedTokenCount();
325                Optional<T> result = consumer.apply(this);
326                if (!result.isPresent()) {
327                        resetToPosition(parserPositionBefore);
328                }
329                return result;
330        }
331
332        /**
333         * Returns the language of the given {@link IncludedTokens} (assuming all have
334         * the same language).
335         */
336        public ELanguage getTokenLanguage() {
337                return TokenStreamUtils.getLanguage(tokens);
338        }
339
340        /**
341         * Skips any remaining tokens. The next call of {@link #isDone()} will return
342         * <code>true</code>.
343         */
344        public void skipToEnd() {
345                currentToken = tokens.size();
346        }
347
348        /** Returns the current token or null iff {@link #isDone()}. */
349        public IToken currentToken() {
350                if (isDone()) {
351                        return null;
352                }
353                return tokens.get(currentToken);
354        }
355
356        /**
357         * Returns whether the token before the current parsing position has the given
358         * type.
359         */
360        public boolean previousTokenTypeIs(ETokenType type) {
361                if (currentToken == 0) {
362                        return false;
363                }
364                return tokens.get(currentToken - 1).getType() == type;
365        }
366}