001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.kotlin; 018 019import static eu.cqse.check.framework.scanner.ETokenType.ANDAND; 020import static eu.cqse.check.framework.scanner.ETokenType.COMMA; 021import static eu.cqse.check.framework.scanner.ETokenType.DIV; 022import static eu.cqse.check.framework.scanner.ETokenType.DOT; 023import static eu.cqse.check.framework.scanner.ETokenType.ELSE; 024import static eu.cqse.check.framework.scanner.ETokenType.ELVIS; 025import static eu.cqse.check.framework.scanner.ETokenType.EOF; 026import static eu.cqse.check.framework.scanner.ETokenType.EOL; 027import static eu.cqse.check.framework.scanner.ETokenType.EQEQ; 028import static eu.cqse.check.framework.scanner.ETokenType.GT; 029import static eu.cqse.check.framework.scanner.ETokenType.GTEQ; 030import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 031import static eu.cqse.check.framework.scanner.ETokenType.LT; 032import static eu.cqse.check.framework.scanner.ETokenType.LTEQ; 033import static eu.cqse.check.framework.scanner.ETokenType.MINUS; 034import static eu.cqse.check.framework.scanner.ETokenType.MOD; 035import static eu.cqse.check.framework.scanner.ETokenType.MULT; 036import static eu.cqse.check.framework.scanner.ETokenType.NOTEQ; 037import static eu.cqse.check.framework.scanner.ETokenType.OR; 038import static eu.cqse.check.framework.scanner.ETokenType.OROR; 039import static eu.cqse.check.framework.scanner.ETokenType.PLUS; 040import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 041import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 042import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 043import static eu.cqse.check.framework.scanner.ETokenType.SAFECALL_OPERATOR; 044import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON; 045 046import java.util.EnumSet; 047import java.util.List; 048 049import eu.cqse.check.framework.scanner.ETokenType; 050import eu.cqse.check.framework.scanner.IToken; 051import eu.cqse.check.framework.shallowparser.framework.ParserState; 052import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 053import eu.cqse.check.framework.shallowparser.framework.RecognizerUtils; 054 055/** 056 * A recognizer that allows to match a Kotlin statement, which potentially spans 057 * across multiple lines 058 */ 059public class KotlinStatementSubRecognizer extends RecognizerBase<EKotlinParserStates> { 060 061 /** 062 * All tokens that are valid statement separators. 063 */ 064 private static final EnumSet<ETokenType> STATEMENT_SEPARATORS = EnumSet.of(EOL, SEMICOLON, RBRACE, EOF, LT, COMMA, 065 RPAREN, RBRACK, ELSE); 066 067 /** 068 * Binary operators, which can cause a statement to continue on the next line, 069 * even though a EOL has been found. 070 */ 071 private static final EnumSet<ETokenType> BINARY_OPERATORS = EnumSet.of(DOT, SAFECALL_OPERATOR, ELVIS, PLUS, MINUS, 072 MULT, DIV, MOD, ANDAND, OROR, EQEQ, NOTEQ, GT, LT, LTEQ, GTEQ, OR); 073 074 /** Matches a partial statement, which does only match one line. */ 075 private final RecognizerBase<EKotlinParserStates> partialStatementRecognizer; 076 077 /** 078 * Matches the end of a line together with tokens that allow the statement to 079 * continue. 080 */ 081 private final RecognizerBase<EKotlinParserStates> statementContinuationRecognizer = new RecognizerBase<EKotlinParserStates>() { 082 083 /** 084 * Expects to be invoked after a STATEMENT_SEPARATOR has been found. It returns 085 * NO_MATCH if anything besides EOL is found at the end. In case of EOL if the 086 * last token of the currently ended line or the first token of the next 087 * non-empty line is a binary operator and therefore allows the statement to be 088 * continued it matches to the position of the first token after the operator 089 * and EOL's. 090 */ 091 @Override 092 protected int matchesLocally(ParserState<EKotlinParserStates> parserState, List<IToken> tokens, 093 int startOffset) { 094 if (startOffset < tokens.size() && tokens.get(startOffset).getType() == EOL) { 095 if (startOffset > 1) { 096 IToken lastTokenOnLine = tokens.get(startOffset - 1); 097 if (BINARY_OPERATORS.contains(lastTokenOnLine.getType())) { 098 return skipEOLs(tokens, startOffset); 099 } 100 } 101 102 int newOffset = skipEOLs(tokens, startOffset); 103 if (newOffset >= tokens.size()) { 104 return NO_MATCH; 105 } 106 IToken tokenOnNextLine = tokens.get(newOffset); 107 if (tokenOnNextLine.getType() != EOL) { 108 if (BINARY_OPERATORS.contains(tokenOnNextLine.getType()) || tokenOnNextLine.getType() == LBRACE) { 109 return newOffset; 110 } 111 return NO_MATCH; 112 } 113 } 114 115 return NO_MATCH; 116 } 117 118 /** 119 * Skips EOL tokens in the given token list and returns the index of the first 120 * non-EOL token. Implicitly expects 121 */ 122 private int skipEOLs(List<IToken> tokens, int startOffset) { 123 for (int i = startOffset; i < tokens.size(); i++) { 124 if (tokens.get(i).getType() != EOL) { 125 return i; 126 } 127 } 128 return tokens.size(); 129 } 130 131 /** {@inheritDoc} */ 132 @Override 133 protected String getRecognizerStringRepresentation() { 134 return "statement continuation"; 135 } 136 137 }; 138 139 /** 140 * Constructor. 141 * 142 * @param subExpressionRecognizer 143 * Recognizer to be used to find nested lambdas etc. in the statement 144 * @param openingBrackets 145 * List of opening brackets, which are skipped 146 * @param closingBrackets 147 * List of closing brackets, which are skipped 148 */ 149 public KotlinStatementSubRecognizer(RecognizerBase<EKotlinParserStates> subExpressionRecognizer, 150 List<ETokenType> openingBrackets, List<ETokenType> closingBrackets) { 151 RecognizerBase<EKotlinParserStates> genericRecognizer = RecognizerUtils.createRecognizer(start -> { 152 start.sequence(LT).repeated(KotlinShallowParser.VALID_INSIDE_GENERIC_TOKEN_TYPES).sequence(GT) 153 .skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets, 154 subExpressionRecognizer); 155 start.sequence(LT).skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets, 156 subExpressionRecognizer); 157 }); 158 partialStatementRecognizer = RecognizerUtils.createRecognizer(start -> start 159 .skipBeforeWithNesting(STATEMENT_SEPARATORS, openingBrackets, closingBrackets, subExpressionRecognizer) 160 .repeatedSubRecognizer(genericRecognizer).optional(SEMICOLON)); 161 } 162 163 /** {@inheritDoc} */ 164 @Override 165 protected int matchesLocally(ParserState<EKotlinParserStates> parserState, List<IToken> tokens, int startOffset) { 166 int currentOffset = startOffset; 167 168 while (true) { 169 int newOffset = partialStatementRecognizer.matches(parserState, tokens, currentOffset); 170 if (newOffset == NO_MATCH || newOffset == currentOffset) { 171 return currentOffset; 172 } 173 174 currentOffset = newOffset; 175 176 newOffset = statementContinuationRecognizer.matches(parserState, tokens, currentOffset); 177 if (newOffset == NO_MATCH || newOffset == currentOffset) { 178 return currentOffset; 179 } 180 181 currentOffset = newOffset; 182 } 183 } 184 185 /** {@inheritDoc} */ 186 @Override 187 protected String getRecognizerStringRepresentation() { 188 return super.getRecognizerStringRepresentation() + "[" + partialStatementRecognizer.toString() + ", " 189 + statementContinuationRecognizer.toString() + "]"; 190 } 191}