001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.xtend;
018
019import static eu.cqse.check.framework.scanner.ETokenType.IF;
020import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
021import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
022import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
023import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
024import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
025import static eu.cqse.check.framework.scanner.ETokenType.SWITCH;
026import static eu.cqse.check.framework.scanner.ETokenType.TRY;
027
028import java.util.EnumMap;
029import java.util.EnumSet;
030import java.util.List;
031import java.util.Map;
032import java.util.Stack;
033
034import eu.cqse.check.framework.scanner.ETokenType;
035import eu.cqse.check.framework.scanner.ETokenType.ETokenClass;
036import eu.cqse.check.framework.scanner.IToken;
037import eu.cqse.check.framework.shallowparser.framework.ParserState;
038import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
039import eu.cqse.check.framework.shallowparser.languages.xtend.XtendShallowParser.EXtendShallowParserState;
040
041/**
042 * We have to use this recognizer to match single statements in xtend, because
043 * there is no indicator like semicolon or end of line, that marks the end of a
044 * statement. Additionally xtend allows multiple statements in one single line.
045 * <br>
046 * 
047 * The recognizer recognizes specific constructs within a simple statement,
048 * namely: try, switch and if statements.<br>
049 */
050public class XtendSkipToEndOfStatementRecognizer extends RecognizerBase<EXtendShallowParserState> {
051
052        /** Contains the token classes LITERAL and IDENTIFIER. */
053        private final static EnumSet<ETokenClass> LITERAL_OR_IDENTIFIER = EnumSet.of(ETokenClass.LITERAL,
054                        ETokenClass.IDENTIFIER);
055
056        /** Contains the token types VAL and VAR. */
057        private final static EnumSet<ETokenType> VAL_OR_VAR = EnumSet.of(ETokenType.VAL, ETokenType.VAR);
058
059        /** Contains closing parenthesis token types. */
060        private static final EnumSet<ETokenType> CLOSING_PARENTHESIS = EnumSet.of(ETokenType.RPAREN, ETokenType.RBRACK,
061                        ETokenType.RBRACE);
062
063        /** Contains token types THROW and NEW. */
064        private static final EnumSet<ETokenType> THROW_OR_NEW = EnumSet.of(ETokenType.THROW, ETokenType.NEW);
065
066        /** Contains the token classes IDENTIFIER and KEYWORD. */
067        private final static EnumSet<ETokenClass> IDENTIFIER_OR_KEYWORD = EnumSet.of(ETokenClass.IDENTIFIER,
068                        ETokenClass.KEYWORD);
069
070        /**
071         * Contains token types that are allowed to be used in front of a keyword.
072         * Those are EQ, AS and DOT.
073         */
074        private static final EnumSet<ETokenType> ALLOWED_IN_FRONT_OF_KEYWORD = EnumSet.of(ETokenType.EQ,
075                        ETokenType.AS_OPERATOR, ETokenType.DOT);
076
077        /** Matched tokens for nesting in complex Xtend statements. */
078        private final static Map<ETokenType, ETokenType> XTEND_NESTING_MATCH = new EnumMap<ETokenType, ETokenType>(
079                        ETokenType.class);
080
081        static {
082                XTEND_NESTING_MATCH.put(LPAREN, RPAREN);
083                XTEND_NESTING_MATCH.put(LBRACE, RBRACE);
084        }
085
086        /** {@inheritDoc} */
087        @Override
088        protected int matchesLocally(ParserState<EXtendShallowParserState> parserState, List<IToken> tokens, int offset) {
089                if (offset <= 0) {
090                        return NO_MATCH;
091                }
092
093                IToken lastToken = tokens.get(offset - 1);
094                Stack<ETokenType> expectedClosing = new Stack<ETokenType>();
095
096                while (true) {
097                        if (offset >= tokens.size()) {
098                                return offset;
099                        }
100
101                        IToken token = tokens.get(offset);
102                        ETokenType tokenType = token.getType();
103
104                        if (!expectedClosing.isEmpty() && tokenType == expectedClosing.peek()) {
105                                expectedClosing.pop();
106                        } else if (expectedClosing.isEmpty() && tokenType == SEMICOLON) {
107                                return offset + 1;
108                        } else if (expectedClosing.isEmpty() && startsNewStatement(token, lastToken)) {
109                                return offset;
110                        } else if (XTEND_NESTING_MATCH.containsKey(tokenType)) {
111                                expectedClosing.push(XTEND_NESTING_MATCH.get(tokenType));
112                        } else if (tokenType == ETokenType.LBRACK && lastToken.getType() == ETokenType.HASH_OPERATOR) {
113                                expectedClosing.push(ETokenType.RBRACK);
114                        } else {
115                                int next = startSubParse(parserState, tokens, offset, tokenType);
116                                if (next == NO_MATCH) {
117                                        return NO_MATCH;
118                                }
119                                if (next != offset) {
120                                        offset = next;
121                                        lastToken = tokens.get(offset - 1);
122                                        continue;
123                                }
124                        }
125
126                        lastToken = token;
127                        offset += 1;
128                }
129        }
130
131        /**
132         * Checks if a new statement is about to start. This means, that
133         * <code>lastToken</code> is part of the statement and <code>token</code> is
134         * not. <br>
135         * Defaults to false.
136         */
137        private static boolean startsNewStatement(IToken token, IToken lastToken) {
138                ETokenType tokenType = token.getType();
139                ETokenType lastTokenType = lastToken.getType();
140
141                ETokenClass tokenClass = tokenType.getTokenClass();
142                ETokenClass lastTokenClass = lastTokenType.getTokenClass();
143
144                if (VAL_OR_VAR.contains(tokenType)) {
145                        return true;
146                }
147
148                if (VAL_OR_VAR.contains(lastTokenType)) {
149                        return false;
150                }
151
152                if (LITERAL_OR_IDENTIFIER.contains(tokenClass) && LITERAL_OR_IDENTIFIER.contains(lastTokenClass)) {
153                        return true;
154                }
155
156                if (CLOSING_PARENTHESIS.contains(lastTokenType) && LITERAL_OR_IDENTIFIER.contains(tokenClass)) {
157                        return true;
158                }
159
160                if (CLOSING_PARENTHESIS.contains(tokenType)) {
161                        return true;
162                }
163
164                if (LITERAL_OR_IDENTIFIER.contains(lastTokenClass) && tokenType == ETokenType.LBRACE) {
165                        return true;
166                }
167
168                // Treat return explicitly, to allow cases like 'return null'
169                if (lastTokenType == ETokenType.RETURN
170                                && (LITERAL_OR_IDENTIFIER.contains(tokenClass) || tokenClass == ETokenClass.KEYWORD)) {
171                        return false;
172                }
173
174                if (THROW_OR_NEW.contains(lastTokenType) && (IDENTIFIER_OR_KEYWORD.contains(tokenClass))) {
175                        return false;
176                }
177
178                if (lastTokenClass == ETokenClass.KEYWORD && LITERAL_OR_IDENTIFIER.contains(tokenClass)) {
179                        return true;
180                }
181
182                // RETURN, THROW, and NEW have already been handled.
183                return !ALLOWED_IN_FRONT_OF_KEYWORD.contains(lastTokenType) && lastTokenClass != ETokenClass.OPERATOR
184                                && tokenClass == ETokenClass.KEYWORD;
185        }
186
187        /**
188         * Starts a subparser, if tokenType equals TRY, SWITCH or IF. Also starts to
189         * subparse in case of a lambda expression.
190         * 
191         * @return new startOffset
192         */
193        private static int startSubParse(ParserState<EXtendShallowParserState> parserState, List<IToken> tokens,
194                        int startOffset, ETokenType tokenType) {
195                if (tokenType == TRY || tokenType == SWITCH || tokenType == IF) {
196                        return parserState.parse(EXtendShallowParserState.IN_METHOD, tokens, startOffset);
197                } else if (tokenType == ETokenType.LBRACK && tokens.get(startOffset - 1).getType() != ETokenType.HASH_OPERATOR
198                                && tokens.get(startOffset + 1).getType() != ETokenType.RBRACK) {
199                        return parserState.parse(EXtendShallowParserState.IN_LAMBDA, tokens, startOffset);
200                }
201                return startOffset;
202        }
203}