001/*-------------------------------------------------------------------------+
002|                                                                          |
003Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.python;
018
019import static eu.cqse.check.framework.scanner.ETokenType.AT;
020import static eu.cqse.check.framework.scanner.ETokenType.CLASS;
021import static eu.cqse.check.framework.scanner.ETokenType.COLON;
022import static eu.cqse.check.framework.scanner.ETokenType.DEDENT;
023import static eu.cqse.check.framework.scanner.ETokenType.DEF;
024import static eu.cqse.check.framework.scanner.ETokenType.DOT;
025import static eu.cqse.check.framework.scanner.ETokenType.ELIF;
026import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
027import static eu.cqse.check.framework.scanner.ETokenType.EOL;
028import static eu.cqse.check.framework.scanner.ETokenType.EXCEPT;
029import static eu.cqse.check.framework.scanner.ETokenType.FINALLY;
030import static eu.cqse.check.framework.scanner.ETokenType.FOR;
031import static eu.cqse.check.framework.scanner.ETokenType.FROM;
032import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
033import static eu.cqse.check.framework.scanner.ETokenType.IF;
034import static eu.cqse.check.framework.scanner.ETokenType.IMPORT;
035import static eu.cqse.check.framework.scanner.ETokenType.INDENT;
036import static eu.cqse.check.framework.scanner.ETokenType.LAMBDA;
037import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
038import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
039import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
040import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
041import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
042import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
043import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
044import static eu.cqse.check.framework.scanner.ETokenType.TRY;
045import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
046import static eu.cqse.check.framework.scanner.ETokenType.WITH;
047import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.ANY;
048import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.IN_CLASS;
049import static eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates.IN_LAMBDA;
050
051import java.util.Arrays;
052import java.util.EnumSet;
053
054import org.conqat.lib.commons.region.Region;
055
056import eu.cqse.check.framework.scanner.IToken;
057import eu.cqse.check.framework.shallowparser.SubTypeNames;
058import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
059import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
060import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
061import eu.cqse.check.framework.shallowparser.languages.python.PythonShallowParser.EPythonParserStates;
062
063/**
064 * Shallow parser for Python.
065 */
066public class PythonShallowParser extends ShallowParserBase<EPythonParserStates> {
067
068        /** The states used in this parser. */
069        public enum EPythonParserStates {
070
071                /**
072                 * Any state apart from in-class states. Typically, any construct can occur at
073                 * any place.
074                 */
075                ANY,
076
077                /** State applying within a class scope. */
078                IN_CLASS,
079
080                /** Within a lambda expression. */
081                IN_LAMBDA
082        }
083
084        /** Constructor. */
085        public PythonShallowParser() {
086                super(EPythonParserStates.class, EPythonParserStates.ANY);
087
088                createErrorRules();
089                createImportRules();
090                createDecoratorRules();
091                createClassRules();
092                createFunctionRules();
093                createLambdaRule();
094                createStatementRules();
095        }
096
097        /** Create rules for handling error handling. */
098        private void createErrorRules() {
099                // unmatched indent/dedent: no endNode to keep the node incomplete
100                inAnyState().sequence(INDENT).createNode(EShallowEntityType.META, "Unmatched indent");
101                inAnyState().sequence(DEDENT).createNode(EShallowEntityType.META, "Unmatched dedent");
102        }
103
104        /** Creates parsing rules for imports. */
105        private void createImportRules() {
106                inAnyState().sequence(EnumSet.of(IMPORT, FROM)).createNode(EShallowEntityType.META, 0).skipTo(EOL).endNode();
107        }
108
109        /** Creates parsing rules for decorators. */
110        private void createDecoratorRules() {
111                inAnyState().sequence(AT, IDENTIFIER).repeated(DOT, IDENTIFIER)
112                                .createNode(EShallowEntityType.META, SubTypeNames.DECORATOR, new Region(1, -1))
113                                .skipNested(LPAREN, RPAREN, createLambdaSubRecognizer()).endNode();
114        }
115
116        /** Creates parsing rules for classes. */
117        private void createClassRules() {
118                RecognizerBase<EPythonParserStates> classAlternative = inAnyState().sequence(CLASS, IDENTIFIER)
119                                .skipNested(LPAREN, RPAREN).sequence(COLON).createNode(EShallowEntityType.TYPE, SubTypeNames.CLASS, 1);
120                addBlockClosingAlternatives(classAlternative, IN_CLASS);
121        }
122
123        /**
124         * Creates parsing rules for functions. Considers that type hinting could be
125         * used for parameters and a function too. See
126         * https://www.python.org/dev/peps/pep-0484/
127         */
128        private void createFunctionRules() {
129                RecognizerBase<EPythonParserStates> functionAlternative = inAnyState().sequence(DEF, IDENTIFIER)
130                                .createNode(EShallowEntityType.METHOD, SubTypeNames.METHOD, 1)
131                                .skipNested(LPAREN, RPAREN, createLambdaSubRecognizer()).skipTo(COLON);
132                addBlockClosingAlternatives(functionAlternative, ANY);
133        }
134
135        /** Creates parsing rules for statements. */
136        private void createStatementRules() {
137                // empty statement
138                inAnyState().sequence(SEMICOLON).createNode(EShallowEntityType.STATEMENT, SubTypeNames.EMPTY_STATEMENT)
139                                .endNode();
140
141                // block statements
142                RecognizerBase<EPythonParserStates> ifAlternative = inAnyState()
143                                .sequence(EnumSet.of(IF, ELIF, ELSE, TRY, EXCEPT, FINALLY, WHILE, FOR, WITH))
144                                .createNode(EShallowEntityType.STATEMENT, 0)
145                                .skipToWithNesting(COLON, Arrays.asList(LBRACK, LBRACE, LPAREN), Arrays.asList(RBRACK, RBRACE, RPAREN));
146                addBlockClosingAlternatives(ifAlternative, ANY);
147
148                // remove any isolated EOLs
149                inAnyState().sequence(EOL);
150
151                // in class attributes
152                inState(IN_CLASS).sequenceBefore(IDENTIFIER).subRecognizer(new PythonAttributeRecognizer(), 1, 1).endNode();
153
154                // simple statement
155                inAnyState().sequenceBefore(EnumSet.complementOf(EnumSet.of(DEF, CLASS)))
156                                .subRecognizer(new PythonSimpleStatementRecognizer(), 1, 1).endNode();
157        }
158
159        /** Creates a rule for parsing lambdas. */
160        private void createLambdaRule() {
161                inState(ANY).sequence(LAMBDA).skipTo(COLON).createNode(EShallowEntityType.METHOD, SubTypeNames.LAMBDA)
162                                .parseOnce(IN_LAMBDA).sequenceBefore(EnumSet.of(RPAREN, DEDENT, EOL)).endNode();
163        }
164
165        /**
166         * Creates a recognizer that detects a lambda expression and parses into it.
167         */
168        private RecognizerBase<EPythonParserStates> createLambdaSubRecognizer() {
169                return createRecognizer(start -> start.sequenceBefore(LAMBDA).parseOnce(ANY));
170        }
171
172        /**
173         * Adds two different rules for closing a block:
174         * <ul>
175         * <li>Closing a block by finding a dedent</li>
176         * <li>Single line that ends with EOL, typically this means multiple statements
177         * on one line</li>
178         * </ul>
179         * 
180         * @param matchingAlternative
181         *            The block recognizer to be closed.
182         * @param innerBlockState
183         *            The {@link EPythonParserStates} used within the block this method
184         *            is closing.
185         */
186        private static void addBlockClosingAlternatives(RecognizerBase<EPythonParserStates> matchingAlternative,
187                        EPythonParserStates innerBlockState) {
188                matchingAlternative.sequence(EOL, INDENT).parseUntil(innerBlockState).sequence(DEDENT).endNode();
189                matchingAlternative.parseUntil(innerBlockState).sequence(EOL).endNode();
190        }
191
192        /** {@inheritDoc} */
193        @Override
194        protected boolean isFilteredToken(IToken token, IToken previousToken) {
195                if (super.isFilteredToken(token, previousToken)) {
196                        return true;
197                }
198                // Don't allow double EOLs
199                return previousToken != null && previousToken.getType() == EOL && token.getType() == EOL;
200        }
201
202}