001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.rust; 018 019import static eu.cqse.check.framework.scanner.ETokenType.GT; 020import static eu.cqse.check.framework.scanner.ETokenType.IF; 021import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 022import static eu.cqse.check.framework.scanner.ETokenType.MATCH; 023import static eu.cqse.check.framework.scanner.ETokenType.OR; 024import static eu.cqse.check.framework.scanner.ETokenType.OROR; 025import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 026import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 027import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 028import static eu.cqse.check.framework.scanner.ETokenType.ETokenClass.DELIMITER; 029import static eu.cqse.check.framework.scanner.ETokenType.ETokenClass.OPERATOR; 030import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_EXPRESSION; 031 032import java.util.EnumSet; 033import java.util.List; 034 035import eu.cqse.check.framework.scanner.ETokenType; 036import eu.cqse.check.framework.scanner.ETokenType.ETokenClass; 037import eu.cqse.check.framework.scanner.IToken; 038import eu.cqse.check.framework.shallowparser.TokenStreamUtils; 039import eu.cqse.check.framework.shallowparser.framework.ParserState; 040import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 041import eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates; 042 043/** 044 * A sub-expression recognizer that finds if, match and anonymous block and 045 * lambda expressions in Rust. 046 */ 047public class RustSubExpressionRecognizer extends RecognizerBase<ERustParserStates> { 048 049 /** 050 * A set of token types that start a sub structure expression like if and match. 051 */ 052 private static final EnumSet<ETokenType> SUB_STRUCTURE_START_TOKENS = EnumSet.of(IF, MATCH); 053 054 /** 055 * A set of token classes that indicate a sub expression block if they are 056 * followed by a opening brace. 057 */ 058 private static final EnumSet<ETokenClass> SUB_BLOCK_INDICATOR_CLASSES = EnumSet.of(OPERATOR, DELIMITER); 059 060 /** 061 * A set of token types that indicate that a following brace is no sub 062 * expression block. 063 */ 064 private static final EnumSet<ETokenType> SUB_BLOCK_EXCLUDED_TOKENS = EnumSet.of(RPAREN, RBRACK, RBRACE, GT); 065 066 /** 067 * A set of token classes that indicate a lambda expression block if they are 068 * followed by a pipe. 069 */ 070 private static final EnumSet<ETokenClass> LAMBDA_INDICATOR_CLASSES = EnumSet.of(OPERATOR, DELIMITER); 071 072 /** 073 * A set of token types that indicate that a following pipe is no lambda. 074 */ 075 private static final EnumSet<ETokenType> LAMBDA_EXCLUDED_TOKENS = EnumSet.of(RPAREN, RBRACK, RBRACE); 076 077 /** A set of token types that start a lambda expression. */ 078 private static final EnumSet<ETokenType> LAMBDA_START_TOKENS = EnumSet.of(OR, OROR); 079 080 /** {@inheritDoc} */ 081 @Override 082 protected int matchesLocally(ParserState<ERustParserStates> parserState, List<IToken> tokens, int startOffset) { 083 if (startOffset < tokens.size()) { 084 if (isSubStructureStart(tokens, startOffset)) { 085 return parserState.parse(IN_EXPRESSION, tokens, startOffset); 086 } else if (isSubBlockStart(tokens, startOffset)) { 087 return parserState.parse(IN_EXPRESSION, tokens, startOffset + 1); 088 } else if (isLambdaStart(tokens, startOffset)) { 089 return parserState.parse(IN_EXPRESSION, tokens, startOffset); 090 } 091 } 092 return NO_MATCH; 093 } 094 095 /** 096 * Returns whether a sub structure starts at the given offset in the given token 097 * list. 098 */ 099 private static boolean isSubStructureStart(List<IToken> tokens, int startOffset) { 100 return SUB_STRUCTURE_START_TOKENS.contains(tokens.get(startOffset).getType()); 101 } 102 103 /** 104 * Returns whether a sub expression block starts at the given offset in the 105 * given token list. The heuristic is to look at the token in front of the 106 * opening brace and check whether it is an operator or delimiter. 107 */ 108 private static boolean isSubBlockStart(List<IToken> tokens, int startOffset) { 109 ETokenType firstType = tokens.get(startOffset).getType(); 110 return !SUB_BLOCK_EXCLUDED_TOKENS.contains(firstType) 111 && SUB_BLOCK_INDICATOR_CLASSES.contains(firstType.getTokenClass()) 112 && TokenStreamUtils.hasTokenTypeSequence(tokens, startOffset + 1, LBRACE); 113 } 114 115 /** 116 * Returns whether a lambda starts at the given offset in the given token list. 117 * The heuristic is to look for | or || tokens that are preceeded by either an 118 * operator or delimiter and thus are no bitwise operators. Some tokens like 119 * closing parenthesis, brackets and braces still indicate that the following 120 * pipe does not start a lambda. 121 */ 122 private static boolean isLambdaStart(List<IToken> tokens, int startOffset) { 123 if (!isLambdaIndicated(tokens, startOffset)) { 124 return false; 125 } 126 127 return startOffset >= 0 && LAMBDA_START_TOKENS.contains(tokens.get(startOffset).getType()); 128 } 129 130 /** 131 * Returns whether the token that preceeds the token at startOffset indicates a 132 * lambda expression. 133 */ 134 private static boolean isLambdaIndicated(List<IToken> tokens, int startOffset) { 135 if (startOffset <= 0) { 136 return false; 137 } 138 ETokenType previousType = tokens.get(startOffset - 1).getType(); 139 return LAMBDA_INDICATOR_CLASSES.contains(previousType.getTokenClass()) 140 && !LAMBDA_EXCLUDED_TOKENS.contains(previousType); 141 } 142 143}