001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.kotlin;
018
019import static eu.cqse.check.framework.scanner.ETokenType.ANDAND;
020import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
021import static eu.cqse.check.framework.scanner.ETokenType.DIV;
022import static eu.cqse.check.framework.scanner.ETokenType.DOT;
023import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
024import static eu.cqse.check.framework.scanner.ETokenType.ELVIS;
025import static eu.cqse.check.framework.scanner.ETokenType.EOF;
026import static eu.cqse.check.framework.scanner.ETokenType.EOL;
027import static eu.cqse.check.framework.scanner.ETokenType.EQEQ;
028import static eu.cqse.check.framework.scanner.ETokenType.GT;
029import static eu.cqse.check.framework.scanner.ETokenType.GTEQ;
030import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
031import static eu.cqse.check.framework.scanner.ETokenType.LT;
032import static eu.cqse.check.framework.scanner.ETokenType.LTEQ;
033import static eu.cqse.check.framework.scanner.ETokenType.MINUS;
034import static eu.cqse.check.framework.scanner.ETokenType.MOD;
035import static eu.cqse.check.framework.scanner.ETokenType.MULT;
036import static eu.cqse.check.framework.scanner.ETokenType.NOTEQ;
037import static eu.cqse.check.framework.scanner.ETokenType.OR;
038import static eu.cqse.check.framework.scanner.ETokenType.OROR;
039import static eu.cqse.check.framework.scanner.ETokenType.PLUS;
040import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
041import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
042import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
043import static eu.cqse.check.framework.scanner.ETokenType.SAFECALL_OPERATOR;
044import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
045
046import java.util.EnumSet;
047import java.util.List;
048
049import eu.cqse.check.framework.scanner.ETokenType;
050import eu.cqse.check.framework.scanner.IToken;
051import eu.cqse.check.framework.shallowparser.framework.ParserState;
052import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
053import eu.cqse.check.framework.shallowparser.framework.RecognizerUtils;
054
055/**
056 * A recognizer that allows to match a Kotlin statement, which potentially spans
057 * across multiple lines
058 */
059public class KotlinStatementSubRecognizer extends RecognizerBase<EKotlinParserStates> {
060
061        /**
062         * All tokens that are valid statement separators.
063         */
064        private static final EnumSet<ETokenType> STATEMENT_SEPARATORS = EnumSet.of(EOL, SEMICOLON, RBRACE, EOF, LT, COMMA,
065                        RPAREN, RBRACK, ELSE);
066
067        /**
068         * Binary operators, which can cause a statement to continue on the next line,
069         * even though a EOL has been found.
070         */
071        private static final EnumSet<ETokenType> BINARY_OPERATORS = EnumSet.of(DOT, SAFECALL_OPERATOR, ELVIS, PLUS, MINUS,
072                        MULT, DIV, MOD, ANDAND, OROR, EQEQ, NOTEQ, GT, LT, LTEQ, GTEQ, OR);
073
074        /** Matches a partial statement, which does only match one line. */
075        private final RecognizerBase<EKotlinParserStates> partialStatementRecognizer;
076
077        /**
078         * Matches the end of a line together with tokens that allow the statement to
079         * continue.
080         */
081        private final RecognizerBase<EKotlinParserStates> statementContinuationRecognizer = new RecognizerBase<EKotlinParserStates>() {
082
083                /**
084                 * Expects to be invoked after a STATEMENT_SEPARATOR has been found. It returns
085                 * NO_MATCH if anything besides EOL is found at the end. In case of EOL if the
086                 * last token of the currently ended line or the first token of the next
087                 * non-empty line is a binary operator and therefore allows the statement to be
088                 * continued it matches to the position of the first token after the operator
089                 * and EOL's.
090                 */
091                @Override
092                protected int matchesLocally(ParserState<EKotlinParserStates> parserState, List<IToken> tokens,
093                                int startOffset) {
094                        if (startOffset < tokens.size() && tokens.get(startOffset).getType() == EOL) {
095                                if (startOffset > 1) {
096                                        IToken lastTokenOnLine = tokens.get(startOffset - 1);
097                                        if (BINARY_OPERATORS.contains(lastTokenOnLine.getType())) {
098                                                return skipEOLs(tokens, startOffset);
099                                        }
100                                }
101
102                                int newOffset = skipEOLs(tokens, startOffset);
103                                if (newOffset >= tokens.size()) {
104                                        return NO_MATCH;
105                                }
106                                IToken tokenOnNextLine = tokens.get(newOffset);
107                                if (tokenOnNextLine.getType() != EOL) {
108                                        if (BINARY_OPERATORS.contains(tokenOnNextLine.getType()) || tokenOnNextLine.getType() == LBRACE) {
109                                                return newOffset;
110                                        }
111                                        return NO_MATCH;
112                                }
113                        }
114
115                        return NO_MATCH;
116                }
117
118                /**
119                 * Skips EOL tokens in the given token list and returns the index of the first
120                 * non-EOL token. Implicitly expects
121                 */
122                private int skipEOLs(List<IToken> tokens, int startOffset) {
123                        for (int i = startOffset; i < tokens.size(); i++) {
124                                if (tokens.get(i).getType() != EOL) {
125                                        return i;
126                                }
127                        }
128                        return tokens.size();
129                }
130
131                /** {@inheritDoc} */
132                @Override
133                protected String getRecognizerStringRepresentation() {
134                        return "statement continuation";
135                }
136
137        };
138
139        /**
140         * Constructor.
141         * 
142         * @param subExpressionRecognizer
143         *            Recognizer to be used to find nested lambdas etc. in the statement
144         * @param openingBrackets
145         *            List of opening brackets, which are skipped
146         * @param closingBrackets
147         *            List of closing brackets, which are skipped
148         */
149        public KotlinStatementSubRecognizer(RecognizerBase<EKotlinParserStates> subExpressionRecognizer,
150                        List<ETokenType> openingBrackets, List<ETokenType> closingBrackets) {
151                RecognizerBase<EKotlinParserStates> genericRecognizer = RecognizerUtils.createRecognizer(start -> {
152                        start.sequence(LT).repeated(KotlinShallowParser.VALID_INSIDE_GENERIC_TOKEN_TYPES).sequence(GT)
153                                        .skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets,
154                                                        subExpressionRecognizer);
155                        start.sequence(LT).skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets,
156                                        subExpressionRecognizer);
157                });
158                partialStatementRecognizer = RecognizerUtils.createRecognizer(start -> start
159                                .skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets, subExpressionRecognizer)
160                                .repeatedSubRecognizer(genericRecognizer).optional(SEMICOLON));
161        }
162
163        /** {@inheritDoc} */
164        @Override
165        protected int matchesLocally(ParserState<EKotlinParserStates> parserState, List<IToken> tokens, int startOffset) {
166                int currentOffset = startOffset;
167
168                while (true) {
169                        int newOffset = partialStatementRecognizer.matches(parserState, tokens, currentOffset);
170                        if (newOffset == NO_MATCH || newOffset == currentOffset) {
171                                return currentOffset;
172                        }
173
174                        currentOffset = newOffset;
175
176                        newOffset = statementContinuationRecognizer.matches(parserState, tokens, currentOffset);
177                        if (newOffset == NO_MATCH || newOffset == currentOffset) {
178                                return currentOffset;
179                        }
180
181                        currentOffset = newOffset;
182                }
183        }
184
185        /** {@inheritDoc} */
186        @Override
187        protected String getRecognizerStringRepresentation() {
188                return super.getRecognizerStringRepresentation() + "[" + partialStatementRecognizer.toString() + ", "
189                                + statementContinuationRecognizer.toString() + "]";
190        }
191}