001/*-------------------------------------------------------------------------+ 002| | 003Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.python; 018 019import static eu.cqse.check.framework.scanner.ETokenType.AT; 020import static eu.cqse.check.framework.scanner.ETokenType.CLASS; 021import static eu.cqse.check.framework.scanner.ETokenType.COLON; 022import static eu.cqse.check.framework.scanner.ETokenType.DEDENT; 023import static eu.cqse.check.framework.scanner.ETokenType.DEF; 024import static eu.cqse.check.framework.scanner.ETokenType.DOT; 025import static eu.cqse.check.framework.scanner.ETokenType.ELIF; 026import static eu.cqse.check.framework.scanner.ETokenType.ELSE; 027import static eu.cqse.check.framework.scanner.ETokenType.EOL; 028import static eu.cqse.check.framework.scanner.ETokenType.EXCEPT; 029import static eu.cqse.check.framework.scanner.ETokenType.FINALLY; 030import static eu.cqse.check.framework.scanner.ETokenType.FOR; 031import static eu.cqse.check.framework.scanner.ETokenType.FROM; 032import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER; 033import static eu.cqse.check.framework.scanner.ETokenType.IF; 034import static eu.cqse.check.framework.scanner.ETokenType.IMPORT; 035import static eu.cqse.check.framework.scanner.ETokenType.INDENT; 036import static eu.cqse.check.framework.scanner.ETokenType.LAMBDA; 037import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 038import static eu.cqse.check.framework.scanner.ETokenType.LBRACK; 039import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 040import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 041import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 042import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 043import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON; 044import static eu.cqse.check.framework.scanner.ETokenType.TRY; 045import static eu.cqse.check.framework.scanner.ETokenType.WHILE; 046import static eu.cqse.check.framework.scanner.ETokenType.WITH; 047import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.ANY; 048import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.IN_CLASS; 049import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.IN_LAMBDA; 050 051import java.util.Arrays; 052import java.util.EnumSet; 053 054import org.conqat.lib.commons.region.Region; 055 056import eu.cqse.check.framework.scanner.IToken; 057import eu.cqse.check.framework.shallowparser.SubTypeNames; 058import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 059import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 060import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase; 061import eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates; 062 063/** 064 * Shallow parser for Python. 065 */ 066public class PythonShallowParser extends ShallowParserBase<EPythonParserStates> { 067 068 /** The states used in this parser. */ 069 public enum EPythonParserStates { 070 071 /** 072 * Any state apart from in-class states. Typically, any construct can occur at 073 * any place. 074 */ 075 ANY, 076 077 /** State applying within a class scope. */ 078 IN_CLASS, 079 080 /** Within a lambda expression. */ 081 IN_LAMBDA 082 } 083 084 /** Constructor. */ 085 public PythonShallowParser() { 086 super(EPythonParserStates.class, EPythonParserStates.ANY); 087 088 createErrorRules(); 089 createImportRules(); 090 createDecoratorRules(); 091 createClassRules(); 092 createFunctionRules(); 093 createLambdaRule(); 094 createStatementRules(); 095 } 096 097 /** Create rules for handling error handling. */ 098 private void createErrorRules() { 099 // unmatched indent/dedent: no endNode to keep the node incomplete 100 inAnyState().sequence(INDENT).createNode(EShallowEntityType.META, "Unmatched indent"); 101 inAnyState().sequence(DEDENT).createNode(EShallowEntityType.META, "Unmatched dedent"); 102 } 103 104 /** Creates parsing rules for imports. */ 105 private void createImportRules() { 106 inAnyState().sequence(EnumSet.of(IMPORT, FROM)).createNode(EShallowEntityType.META, 0).skipTo(EOL).endNode(); 107 } 108 109 /** Creates parsing rules for decorators. */ 110 private void createDecoratorRules() { 111 inAnyState().sequence(AT, IDENTIFIER).repeated(DOT, IDENTIFIER) 112 .createNode(EShallowEntityType.META, SubTypeNames.DECORATOR, new Region(1, -1)) 113 .skipNested(LPAREN, RPAREN, createLambdaSubRecognizer()).endNode(); 114 } 115 116 /** Creates parsing rules for classes. */ 117 private void createClassRules() { 118 RecognizerBase<EPythonParserStates> classAlternative = inAnyState().sequence(CLASS, IDENTIFIER) 119 .skipNested(LPAREN, RPAREN).sequence(COLON).createNode(EShallowEntityType.TYPE, SubTypeNames.CLASS, 1); 120 addBlockClosingAlternatives(classAlternative, IN_CLASS); 121 } 122 123 /** 124 * Creates parsing rules for functions. Considers that type hinting could be 125 * used for parameters and a function too. See 126 * https://www.python.org/dev/peps/pep-0484/ 127 */ 128 private void createFunctionRules() { 129 RecognizerBase<EPythonParserStates> functionAlternative = inAnyState().sequence(DEF, IDENTIFIER) 130 .createNode(EShallowEntityType.METHOD, SubTypeNames.METHOD, 1) 131 .skipNested(LPAREN, RPAREN, createLambdaSubRecognizer()).skipTo(COLON); 132 addBlockClosingAlternatives(functionAlternative, ANY); 133 } 134 135 /** Creates parsing rules for statements. */ 136 private void createStatementRules() { 137 // empty statement 138 inAnyState().sequence(SEMICOLON).createNode(EShallowEntityType.STATEMENT, SubTypeNames.EMPTY_STATEMENT) 139 .endNode(); 140 141 // block statements 142 RecognizerBase<EPythonParserStates> ifAlternative = inAnyState() 143 .sequence(EnumSet.of(IF, ELIF, ELSE, TRY, EXCEPT, FINALLY, WHILE, FOR, WITH)) 144 .createNode(EShallowEntityType.STATEMENT, 0) 145 .skipToWithNesting(COLON, Arrays.asList(LBRACK, LBRACE, LPAREN), Arrays.asList(RBRACK, RBRACE, RPAREN)); 146 addBlockClosingAlternatives(ifAlternative, ANY); 147 148 // remove any isolated EOLs 149 inAnyState().sequence(EOL); 150 151 // in class attributes 152 inState(IN_CLASS).sequenceBefore(IDENTIFIER).subRecognizer(new PythonAttributeRecognizer(), 1, 1).endNode(); 153 154 // simple statement 155 inAnyState().sequenceBefore(EnumSet.complementOf(EnumSet.of(DEF, CLASS))) 156 .subRecognizer(new PythonSimpleStatementRecognizer(), 1, 1).endNode(); 157 } 158 159 /** Creates a rule for parsing lambdas. */ 160 private void createLambdaRule() { 161 inState(ANY).sequence(LAMBDA).skipTo(COLON).createNode(EShallowEntityType.METHOD, SubTypeNames.LAMBDA) 162 .parseOnce(IN_LAMBDA).sequenceBefore(EnumSet.of(RPAREN, DEDENT, EOL)).endNode(); 163 } 164 165 /** 166 * Creates a recognizer that detects a lambda expression and parses into it. 167 */ 168 private RecognizerBase<EPythonParserStates> createLambdaSubRecognizer() { 169 return createRecognizer(start -> start.sequenceBefore(LAMBDA).parseOnce(ANY)); 170 } 171 172 /** 173 * Adds two different rules for closing a block: 174 * <ul> 175 * <li>Closing a block by finding a dedent</li> 176 * <li>Single line that ends with EOL, typically this means multiple statements 177 * on one line</li> 178 * </ul> 179 * 180 * @param matchingAlternative 181 * The block recognizer to be closed. 182 * @param innerBlockState 183 * The {@link EPythonParserStates} used within the block this method 184 * is closing. 185 */ 186 private static void addBlockClosingAlternatives(RecognizerBase<EPythonParserStates> matchingAlternative, 187 EPythonParserStates innerBlockState) { 188 matchingAlternative.sequence(EOL, INDENT).parseUntil(innerBlockState).sequence(DEDENT).endNode(); 189 matchingAlternative.parseUntil(innerBlockState).sequence(EOL).endNode(); 190 } 191 192 /** {@inheritDoc} */ 193 @Override 194 protected boolean isFilteredToken(IToken token, IToken previousToken) { 195 if (super.isFilteredToken(token, previousToken)) { 196 return true; 197 } 198 // Don't allow double EOLs 199 return previousToken != null && previousToken.getType() == EOL && token.getType() == EOL; 200 } 201 202}