001/*-----------------------------------------------------------------------+ 002 | org.conqat.engine.index.incubator 003 | | 004 $Id$ 005 | | 006 | Copyright (c) 2009-2013 CQSE GmbH | 007 +-----------------------------------------------------------------------*/ 008package eu.cqse.check.framework.util.tokens; 009 010import java.util.ArrayList; 011import java.util.Collections; 012import java.util.EnumSet; 013import java.util.List; 014import java.util.Optional; 015import java.util.Set; 016import java.util.function.Function; 017 018import org.conqat.lib.commons.assertion.CCSMAssert; 019import org.conqat.lib.commons.collections.Pair; 020 021import eu.cqse.check.framework.preprocessor.c.IncludedTokens; 022import eu.cqse.check.framework.scanner.ELanguage; 023import eu.cqse.check.framework.scanner.ETokenType; 024import eu.cqse.check.framework.scanner.IToken; 025import eu.cqse.check.framework.shallowparser.TokenStreamUtils; 026 027/** 028 * Allows parsing a token stream from left to right. Parser can be reset to a 029 * given position. 030 */ 031public class TokenStreamParser { 032 033 /** The complete original token stream. */ 034 private final List<IToken> tokens; 035 036 /** The current position of the parser. */ 037 private int currentToken = 0; 038 039 /** 040 * Constructor. 041 * 042 * @param tokens 043 * The tokens to parse. 044 * @param filteredTokens 045 * A set of token types. All tokens of these types are filtered out 046 * before parsing, i.e. they are simply skipped. 047 */ 048 public TokenStreamParser(List<IToken> tokens, Set<ETokenType> filteredTokens) { 049 this(filter(tokens, filteredTokens)); 050 } 051 052 /** Constructor. */ 053 public TokenStreamParser(List<IToken> tokens) { 054 this.tokens = tokens; 055 } 056 057 /** Constructor. */ 058 public TokenStreamParser(List<IToken> tokens, int startPosition) { 059 assertPositionAllowed(startPosition, tokens); 060 this.tokens = tokens; 061 this.currentToken = startPosition; 062 063 } 064 065 /** 066 * Returns a filtered view of the given token list that does not contain any 067 * tokens of the given types. 068 */ 069 private static List<IToken> filter(List<IToken> tokens, Set<ETokenType> filteredTokens) { 070 List<IToken> filtered = new ArrayList<>(); 071 for (IToken token : tokens) { 072 if (!filteredTokens.contains(token.getType())) { 073 filtered.add(token); 074 } 075 } 076 return filtered; 077 } 078 079 /** 080 * Consumes all sequential tokens of the given types and returns the consumed 081 * tokens. 082 */ 083 public List<IToken> consumeAnyOf(Set<ETokenType> matchTypes) { 084 int start = currentToken; 085 while (isAnyOf(matchTypes)) { 086 currentToken++; 087 } 088 return tokens.subList(start, currentToken); 089 } 090 091 /** 092 * Consumes one token of the given type and returns the token in an 093 * {@link Optional} or <code>Optional.empty()</code> if the current token is not 094 * of any of those types. This method only advances the parser if the current 095 * token has one of the given types. 096 */ 097 public Optional<IToken> consumeOneOrZeroOf(Set<ETokenType> types) { 098 if (isAnyOf(types)) { 099 if (isDone()) { 100 return Optional.empty(); 101 } 102 IToken token = tokens.get(currentToken); 103 currentToken++; 104 return Optional.of(token); 105 } 106 return Optional.empty(); 107 } 108 109 /** 110 * Consumes one token of the given type and returns the token in an 111 * {@link Optional} or <code>Optional.empty()</code> if the current token is not 112 * of any of those types. This method only advances the parser if the current 113 * token has one of the given types. 114 */ 115 public Optional<IToken> consumeOneOrZeroOf(ETokenType type) { 116 return consumeOneOrZeroOf(EnumSet.of(type)); 117 } 118 119 /** 120 * Consumes all sequential tokens that are not of the given types and returns 121 * their texts. 122 */ 123 public List<String> consumeAnyExcept(Set<ETokenType> types) { 124 List<String> texts = new ArrayList<>(); 125 while (!isAnyOf(types)) { 126 if (currentType() == null) { 127 break; 128 } 129 texts.add(currentText()); 130 currentToken++; 131 } 132 return texts; 133 } 134 135 /** 136 * Skips to the first occurrence of one of the given {@link ETokenType}s, 137 * ignoring tokens nested in pairs of the given opening/closing 138 * {@link ETokenType}s. 139 * 140 * If no top-level occurrence of one of the search types is found, the parser 141 * position is left unchanged and <code>false</code> is returned. If an 142 * occurrence of one of the search types is found, the parser is left pointing 143 * at it and <code>true</code> is returned. 144 * 145 * @return <code>true</code> if the skipping succeeded and <code>false</code> if 146 * no top-level occurrence of the searchTypes could be found. 147 */ 148 public boolean skipToFirstTopLevel(EnumSet<ETokenType> searchTypes, List<ETokenType> openingTypes, 149 List<ETokenType> closingTypes) { 150 int firstTopLevelOccurrence = TokenStreamUtils.findFirstTopLevel(tokens, currentToken, searchTypes, 151 openingTypes, closingTypes); 152 if (firstTopLevelOccurrence == TokenStreamUtils.NOT_FOUND) { 153 return false; 154 } 155 currentToken = firstTopLevelOccurrence; 156 return true; 157 } 158 159 /** 160 * Skips balanced braces as long as they only contain the allowed inner types. 161 * Checks that the current token is of the opening type. 162 * 163 * Skipping may be interrupted prematurely if unexpected tokens are encountered. 164 * In this case, the parser is left pointing to the bad token. 165 * 166 * @return Returns a Pair. The first element states whether this method 167 * succeeded (<code>true</code> if did not encounter a disallowed inner 168 * type). The second element is the list of consumed {@link IToken}s. 169 */ 170 public Pair<Boolean, List<IToken>> skipBalanced(ETokenType openingType, ETokenType closingType, 171 Set<ETokenType> allowedInnerTypes) { 172 CCSMAssert.isFalse(allowedInnerTypes.contains(openingType), 173 "The given inner token types contain the opening token type"); 174 CCSMAssert.isFalse(allowedInnerTypes.contains(closingType), 175 "The given inner token types contain the closing token type"); 176 if (currentType() != openingType) { 177 return Pair.createPair(false, Collections.emptyList()); 178 } 179 ArrayList<IToken> consumedTokens = new ArrayList<>(); 180 consumeOneOrZeroOf(EnumSet.of(openingType)).ifPresent(consumedTokens::add); 181 182 int openCount = 1; 183 while (openCount > 0) { 184 consumedTokens.addAll(consumeAnyOf(allowedInnerTypes)); 185 Optional<IToken> consumed; 186 if ((consumed = consumeOneOrZeroOf(EnumSet.of(openingType))).isPresent()) { 187 openCount++; 188 consumedTokens.add(consumed.get()); 189 } else if ((consumed = consumeOneOrZeroOf(EnumSet.of(closingType))).isPresent()) { 190 openCount--; 191 consumedTokens.add(consumed.get()); 192 } else { 193 return Pair.createPair(false, consumedTokens); 194 } 195 } 196 return Pair.createPair(true, consumedTokens); 197 } 198 199 /** 200 * Syntactic sugar for skipping balanced LPAREN and RPAREN. Wraps 201 * {@link #skipBalanced(ETokenType, ETokenType, Set)} with 202 * {@link ETokenType#LPAREN} and {@link ETokenType#RPAREN}. 203 */ 204 public Pair<Boolean, List<IToken>> skipBalancedParentheses() { 205 return skipBalanced(ETokenType.LPAREN, ETokenType.RPAREN, 206 EnumSet.complementOf(EnumSet.of(ETokenType.LPAREN, ETokenType.RPAREN))); 207 } 208 209 /** Consumes tokens as long as they alternate between the two type sets. */ 210 public List<IToken> consumeAlternating(Set<ETokenType> typeSet1, Set<ETokenType> typeSet2) { 211 List<IToken> list = new ArrayList<>(); 212 while (true) { 213 Optional<IToken> token1 = consumeOneOrZeroOf(typeSet1); 214 if (!token1.isPresent()) { 215 break; 216 } 217 list.add(token1.get()); 218 Optional<IToken> token2 = consumeOneOrZeroOf(typeSet2); 219 if (!token2.isPresent()) { 220 break; 221 } 222 list.add(token2.get()); 223 } 224 return list; 225 } 226 227 /** 228 * Returns <code>true</code> if the current token has any of the given types. 229 */ 230 public boolean isAnyOf(Set<ETokenType> types) { 231 return types.contains(currentType()); 232 } 233 234 /** 235 * Returns <code>true</code> if all tokens have been consumed and parsing is 236 * done. 237 */ 238 public boolean isDone() { 239 return currentToken >= tokens.size(); 240 } 241 242 /** 243 * Returns the text of the current token or <code>null</code> if all tokens have 244 * been consumed. 245 */ 246 public String currentText() { 247 if (isDone()) { 248 return null; 249 } 250 return tokens.get(currentToken).getText(); 251 } 252 253 /** 254 * Returns the type of the current token or <code>null</code> if all tokens have 255 * been consumed. 256 */ 257 public ETokenType currentType() { 258 if (isDone()) { 259 return null; 260 } 261 return tokens.get(currentToken).getType(); 262 } 263 264 /** Returns whether the next tokens correspond to the given sequence */ 265 public boolean continuesWithSequence(ETokenType... tokenTypes) { 266 return !isDone() && TokenStreamUtils.containsSequence(tokens, currentToken, tokens.size() - 1, tokenTypes); 267 } 268 269 /** 270 * Skips to the beginning of the specified sequence or to the end if the 271 * sequence was not found 272 */ 273 public void skipToSequence(ETokenType... tokenTypes) { 274 int index = TokenStreamUtils.firstTokenOfTypeSequence(tokens, currentToken, tokenTypes); 275 if (index != TokenStreamUtils.NOT_FOUND) { 276 currentToken = index; 277 } else { 278 skipToEnd(); 279 } 280 } 281 282 /** 283 * Returns the number of tokens consumed so far. 284 */ 285 public int getConsumedTokenCount() { 286 return currentToken; 287 } 288 289 /** 290 * Reset parser position to the given token index. Current position is 291 * {@link #getConsumedTokenCount()}. 292 */ 293 public void resetToPosition(int newPosition) { 294 assertPositionAllowed(newPosition, tokens); 295 currentToken = newPosition; 296 } 297 298 /** 299 * Asserts that the given index is contained in the list. 300 */ 301 private static void assertPositionAllowed(int index, List<IToken> tokens) throws AssertionError { 302 CCSMAssert.isTrue(index >= 0 && index <= tokens.size(), 303 "The new position is out of range (position: " + index + " range: 0--" + (tokens.size() - 1)); 304 } 305 306 /** {@inheritDoc} */ 307 @Override 308 public String toString() { 309 return "TokenStreamParser " + tokens.subList(currentToken, tokens.size()); 310 } 311 312 /** @see #tokens */ 313 public List<IToken> getTokens() { 314 return tokens; 315 } 316 317 /** 318 * Tries to apply the given token consumer to this {@link TokenStreamParser}. If 319 * the consumer returns {@link Optional#empty()}, resets the parser to the 320 * current position and returns {@link Optional#empty()}. Otherwise, returns the 321 * return value of the consumer. 322 */ 323 public <T> Optional<T> tryConsume(Function<TokenStreamParser, Optional<T>> consumer) { 324 int parserPositionBefore = getConsumedTokenCount(); 325 Optional<T> result = consumer.apply(this); 326 if (!result.isPresent()) { 327 resetToPosition(parserPositionBefore); 328 } 329 return result; 330 } 331 332 /** 333 * Returns the language of the given {@link IncludedTokens} (assuming all have 334 * the same language). 335 */ 336 public ELanguage getTokenLanguage() { 337 return TokenStreamUtils.getLanguage(tokens); 338 } 339 340 /** 341 * Skips any remaining tokens. The next call of {@link #isDone()} will return 342 * <code>true</code>. 343 */ 344 public void skipToEnd() { 345 currentToken = tokens.size(); 346 } 347 348 /** Returns the current token or null iff {@link #isDone()}. */ 349 public IToken currentToken() { 350 if (isDone()) { 351 return null; 352 } 353 return tokens.get(currentToken); 354 } 355 356 /** 357 * Returns whether the token before the current parsing position has the given 358 * type. 359 */ 360 public boolean previousTokenTypeIs(ETokenType type) { 361 if (currentToken == 0) { 362 return false; 363 } 364 return tokens.get(currentToken - 1).getType() == type; 365 } 366}