001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.base; 018 019import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 020import static eu.cqse.check.framework.scanner.ETokenType.LBRACK; 021import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 022import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 023import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 024import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 025import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON; 026 027import java.util.ArrayList; 028import java.util.Arrays; 029import java.util.EnumMap; 030import java.util.List; 031import java.util.Map; 032import java.util.Stack; 033 034import eu.cqse.check.framework.scanner.ETokenType; 035import eu.cqse.check.framework.scanner.IToken; 036import eu.cqse.check.framework.shallowparser.SubTypeNames; 037import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 038import eu.cqse.check.framework.shallowparser.framework.ParserState; 039import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 040import eu.cqse.check.framework.shallowparser.framework.ShallowEntity; 041 042/** 043 * Base class for recognizing statements in a language whose statements are 044 * primarily line-based. It also assumes that a semicolon may always be used to 045 * terminate a statement. 046 */ 047public abstract class LineBasedStatementRecognizerBase<STATE extends Enum<STATE>> extends RecognizerBase<STATE> { 048 049 /** List of TypeScript modifiers. */ 050 private static final List<String> TYPESCRIPT_MODIFIERS = new ArrayList<>( 051 Arrays.asList("static", "protected", "public", "private", "readonly")); 052 053 /** Matched tokens for nesting in complex statements. */ 054 private final Map<ETokenType, ETokenType> nesting_match = new EnumMap<>(ETokenType.class); 055 056 /** Constructor. */ 057 public LineBasedStatementRecognizerBase() { 058 registerNestingMatch(LPAREN, RPAREN); 059 registerNestingMatch(LBRACK, RBRACK); 060 registerNestingMatch(LBRACE, RBRACE); 061 } 062 063 /** 064 * Registers nesting tokens (e.g. braces). When the recognizer is going through 065 * a statement it will check if the current token is of a type registered with 066 * this method. 067 */ 068 protected void registerNestingMatch(ETokenType opening, ETokenType closing) { 069 nesting_match.put(opening, closing); 070 } 071 072 /** 073 * Check whether the given token is a type of closing token for tokens which 074 * might be nested in a statement. 075 * <p> 076 * For example `)` would return true in `a = new ArrayList();`. 077 */ 078 protected boolean isNestedClosingToken(ETokenType token) { 079 return nesting_match.containsValue(token); 080 } 081 082 /** {@inheritDoc} */ 083 @Override 084 protected int matchesLocally(ParserState<STATE> parserState, List<IToken> tokens, int startOffset) { 085 IToken lastToken = null; 086 Stack<ETokenType> expectedClosing = new Stack<>(); 087 088 int nodeStart = startOffset; 089 // taking the modifiers into consideration 090 while (nodeStart > 0 && TYPESCRIPT_MODIFIERS.contains(getEntityName(tokens, nodeStart - 1))) { 091 nodeStart = nodeStart - 1; 092 } 093 094 // create a node here, so we can append function nodes 095 parserState.setNode(new ShallowEntity(getEntityType(), getEntitySubtypeName(), 096 getEntityName(tokens, startOffset), tokens, nodeStart)); 097 098 while (true) { 099 if (startOffset >= tokens.size()) { 100 return startOffset; 101 } 102 103 IToken token = tokens.get(startOffset); 104 ETokenType tokenType = token.getType(); 105 106 if (!expectedClosing.isEmpty() && tokenType == expectedClosing.peek()) { 107 expectedClosing.pop(); 108 } else if (expectedClosing.isEmpty() && tokenType == SEMICOLON) { 109 return startOffset + 1; 110 } else if (expectedClosing.isEmpty() && startsNewStatement(token, lastToken)) { 111 return startOffset; 112 } else if (tokenStartsSubParse(tokenType, tokens, startOffset, expectedClosing)) { 113 int next = parserState.parse(getSubParseState(), tokens, startOffset); 114 if (next == NO_MATCH) { 115 return NO_MATCH; 116 } 117 startOffset = next; 118 lastToken = tokens.get(startOffset - 1); 119 continue; 120 } else if (nesting_match.containsKey(tokenType)) { 121 expectedClosing.push(nesting_match.get(tokenType)); 122 } 123 124 lastToken = token; 125 startOffset += 1; 126 } 127 } 128 129 /** 130 * @return The entity type that is to be created when matching. 131 */ 132 protected EShallowEntityType getEntityType() { 133 return EShallowEntityType.STATEMENT; 134 } 135 136 /** 137 * @return The entity subtype name to be used when creating a node. 138 */ 139 protected String getEntitySubtypeName() { 140 return SubTypeNames.SIMPLE_STATEMENT; 141 } 142 143 /** 144 * @return The name used for the created entity node. Default implementation 145 * uses the text of the first token (the token at position startOffset). 146 */ 147 protected String getEntityName(List<IToken> tokens, int startOffset) { 148 return tokens.get(startOffset).getText(); 149 } 150 151 /** Returns the state to be used for a sub parse. */ 152 protected abstract STATE getSubParseState(); 153 154 /** 155 * Returns true if the token signals to start a sub parse (e.g. embedded 156 * classes, functions, etc.). 157 * 158 * @param tokenType 159 * the type of the current token. 160 * @param tokens 161 * the entire token stream. 162 * @param offset 163 * the offset of the current token in the token stream. 164 * @param expectedClosing 165 * the stack of currently expected closing delimiters 166 */ 167 protected abstract boolean tokenStartsSubParse(ETokenType tokenType, List<IToken> tokens, int offset, 168 Stack<ETokenType> expectedClosing); 169 170 /** Returns true if the given token starts a new statement. */ 171 protected abstract boolean startsNewStatement(IToken token, IToken lastToken); 172}