001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.rust;
018
019import static eu.cqse.check.framework.scanner.ETokenType.GT;
020import static eu.cqse.check.framework.scanner.ETokenType.IF;
021import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
022import static eu.cqse.check.framework.scanner.ETokenType.MATCH;
023import static eu.cqse.check.framework.scanner.ETokenType.OR;
024import static eu.cqse.check.framework.scanner.ETokenType.OROR;
025import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
026import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
027import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
028import static eu.cqse.check.framework.scanner.ETokenType.ETokenClass.DELIMITER;
029import static eu.cqse.check.framework.scanner.ETokenType.ETokenClass.OPERATOR;
030import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_EXPRESSION;
031
032import java.util.EnumSet;
033import java.util.List;
034
035import eu.cqse.check.framework.scanner.ETokenType;
036import eu.cqse.check.framework.scanner.ETokenType.ETokenClass;
037import eu.cqse.check.framework.scanner.IToken;
038import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
039import eu.cqse.check.framework.shallowparser.framework.ParserState;
040import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
041import eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates;
042
043/**
044 * A sub-expression recognizer that finds if, match and anonymous block and
045 * lambda expressions in Rust.
046 */
047public class RustSubExpressionRecognizer extends RecognizerBase<ERustParserStates> {
048
049        /**
050         * A set of token types that start a sub structure expression like if and match.
051         */
052        private static final EnumSet<ETokenType> SUB_STRUCTURE_START_TOKENS = EnumSet.of(IF, MATCH);
053
054        /**
055         * A set of token classes that indicate a sub expression block if they are
056         * followed by a opening brace.
057         */
058        private static final EnumSet<ETokenClass> SUB_BLOCK_INDICATOR_CLASSES = EnumSet.of(OPERATOR, DELIMITER);
059
060        /**
061         * A set of token types that indicate that a following brace is no sub
062         * expression block.
063         */
064        private static final EnumSet<ETokenType> SUB_BLOCK_EXCLUDED_TOKENS = EnumSet.of(RPAREN, RBRACK, RBRACE, GT);
065
066        /**
067         * A set of token classes that indicate a lambda expression block if they are
068         * followed by a pipe.
069         */
070        private static final EnumSet<ETokenClass> LAMBDA_INDICATOR_CLASSES = EnumSet.of(OPERATOR, DELIMITER);
071
072        /**
073         * A set of token types that indicate that a following pipe is no lambda.
074         */
075        private static final EnumSet<ETokenType> LAMBDA_EXCLUDED_TOKENS = EnumSet.of(RPAREN, RBRACK, RBRACE);
076
077        /** A set of token types that start a lambda expression. */
078        private static final EnumSet<ETokenType> LAMBDA_START_TOKENS = EnumSet.of(OR, OROR);
079
080        /** {@inheritDoc} */
081        @Override
082        protected int matchesLocally(ParserState<ERustParserStates> parserState, List<IToken> tokens, int startOffset) {
083                if (startOffset < tokens.size()) {
084                        if (isSubStructureStart(tokens, startOffset)) {
085                                return parserState.parse(IN_EXPRESSION, tokens, startOffset);
086                        } else if (isSubBlockStart(tokens, startOffset)) {
087                                return parserState.parse(IN_EXPRESSION, tokens, startOffset + 1);
088                        } else if (isLambdaStart(tokens, startOffset)) {
089                                return parserState.parse(IN_EXPRESSION, tokens, startOffset);
090                        }
091                }
092                return NO_MATCH;
093        }
094
095        /**
096         * Returns whether a sub structure starts at the given offset in the given token
097         * list.
098         */
099        private static boolean isSubStructureStart(List<IToken> tokens, int startOffset) {
100                return SUB_STRUCTURE_START_TOKENS.contains(tokens.get(startOffset).getType());
101        }
102
103        /**
104         * Returns whether a sub expression block starts at the given offset in the
105         * given token list. The heuristic is to look at the token in front of the
106         * opening brace and check whether it is an operator or delimiter.
107         */
108        private static boolean isSubBlockStart(List<IToken> tokens, int startOffset) {
109                ETokenType firstType = tokens.get(startOffset).getType();
110                return !SUB_BLOCK_EXCLUDED_TOKENS.contains(firstType)
111                                && SUB_BLOCK_INDICATOR_CLASSES.contains(firstType.getTokenClass())
112                                && TokenStreamUtils.hasTokenTypeSequence(tokens, startOffset + 1, LBRACE);
113        }
114
115        /**
116         * Returns whether a lambda starts at the given offset in the given token list.
117         * The heuristic is to look for | or || tokens that are preceeded by either an
118         * operator or delimiter and thus are no bitwise operators. Some tokens like
119         * closing parenthesis, brackets and braces still indicate that the following
120         * pipe does not start a lambda.
121         */
122        private static boolean isLambdaStart(List<IToken> tokens, int startOffset) {
123                if (!isLambdaIndicated(tokens, startOffset)) {
124                        return false;
125                }
126
127                return startOffset >= 0 && LAMBDA_START_TOKENS.contains(tokens.get(startOffset).getType());
128        }
129
130        /**
131         * Returns whether the token that preceeds the token at startOffset indicates a
132         * lambda expression.
133         */
134        private static boolean isLambdaIndicated(List<IToken> tokens, int startOffset) {
135                if (startOffset <= 0) {
136                        return false;
137                }
138                ETokenType previousType = tokens.get(startOffset - 1).getType();
139                return LAMBDA_INDICATOR_CLASSES.contains(previousType.getTokenClass())
140                                && !LAMBDA_EXCLUDED_TOKENS.contains(previousType);
141        }
142
143}