001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.java;
018
019import static eu.cqse.check.framework.scanner.ETokenType.ABSTRACT;
020import static eu.cqse.check.framework.scanner.ETokenType.ANNOTATION_INTERFACE;
021import static eu.cqse.check.framework.scanner.ETokenType.ARROW;
022import static eu.cqse.check.framework.scanner.ETokenType.ASSERT;
023import static eu.cqse.check.framework.scanner.ETokenType.AT_OPERATOR;
024import static eu.cqse.check.framework.scanner.ETokenType.BOOLEAN;
025import static eu.cqse.check.framework.scanner.ETokenType.BREAK;
026import static eu.cqse.check.framework.scanner.ETokenType.BYTE;
027import static eu.cqse.check.framework.scanner.ETokenType.CASE;
028import static eu.cqse.check.framework.scanner.ETokenType.CHAR;
029import static eu.cqse.check.framework.scanner.ETokenType.CLASS;
030import static eu.cqse.check.framework.scanner.ETokenType.COLON;
031import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
032import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE;
033import static eu.cqse.check.framework.scanner.ETokenType.DEFAULT;
034import static eu.cqse.check.framework.scanner.ETokenType.DOT;
035import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE;
036import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
037import static eu.cqse.check.framework.scanner.ETokenType.ENUM;
038import static eu.cqse.check.framework.scanner.ETokenType.FINAL;
039import static eu.cqse.check.framework.scanner.ETokenType.FINALLY;
040import static eu.cqse.check.framework.scanner.ETokenType.FLOAT;
041import static eu.cqse.check.framework.scanner.ETokenType.FOR;
042import static eu.cqse.check.framework.scanner.ETokenType.GOTO;
043import static eu.cqse.check.framework.scanner.ETokenType.GT;
044import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
045import static eu.cqse.check.framework.scanner.ETokenType.IMPORT;
046import static eu.cqse.check.framework.scanner.ETokenType.INT;
047import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE;
048import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
049import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
050import static eu.cqse.check.framework.scanner.ETokenType.LONG;
051import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
052import static eu.cqse.check.framework.scanner.ETokenType.LT;
053import static eu.cqse.check.framework.scanner.ETokenType.MINUSMINUS;
054import static eu.cqse.check.framework.scanner.ETokenType.NATIVE;
055import static eu.cqse.check.framework.scanner.ETokenType.NEW;
056import static eu.cqse.check.framework.scanner.ETokenType.PACKAGE;
057import static eu.cqse.check.framework.scanner.ETokenType.PLUSPLUS;
058import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE;
059import static eu.cqse.check.framework.scanner.ETokenType.PROTECTED;
060import static eu.cqse.check.framework.scanner.ETokenType.PUBLIC;
061import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
062import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
063import static eu.cqse.check.framework.scanner.ETokenType.RETURN;
064import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
065import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
066import static eu.cqse.check.framework.scanner.ETokenType.SHORT;
067import static eu.cqse.check.framework.scanner.ETokenType.STATIC;
068import static eu.cqse.check.framework.scanner.ETokenType.SUPER;
069import static eu.cqse.check.framework.scanner.ETokenType.SWITCH;
070import static eu.cqse.check.framework.scanner.ETokenType.SYNCHRONIZED;
071import static eu.cqse.check.framework.scanner.ETokenType.THIS;
072import static eu.cqse.check.framework.scanner.ETokenType.THROW;
073import static eu.cqse.check.framework.scanner.ETokenType.TRANSIENT;
074import static eu.cqse.check.framework.scanner.ETokenType.VOID;
075import static eu.cqse.check.framework.scanner.ETokenType.VOLATILE;
076import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
077import static eu.cqse.check.framework.shallowparser.SubTypeNames.ANNOTATION;
078import static eu.cqse.check.framework.shallowparser.SubTypeNames.CONSTRUCTOR;
079import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.TYPE;
080import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_ENUM;
081import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_EXPRESSION;
082import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_METHOD;
083import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_TYPE;
084import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.TOP_LEVEL;
085
086import java.util.EnumSet;
087
088import org.conqat.lib.commons.region.Region;
089
090import eu.cqse.check.framework.scanner.ETokenType;
091import eu.cqse.check.framework.shallowparser.SubTypeNames;
092import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
093import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
094import eu.cqse.check.framework.shallowparser.languages.base.CStyleShallowParserBase;
095import eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates;
096
097/**
098 * Shallow parser for Java.
099 * <p>
100 * What this parser does and does not:
101 * <ul>
102 * <li>The parser recognizes types (classes, enums, interfaces), methods and
103 * attributes, and individual statements.</li>
104 * <li>It recognizes the nesting of statements (e.g. in loops), but does not
105 * parse into the statements. For example, it recognizes an if-statement and
106 * provides the list of sub-statements, but does not provide direct access to
107 * the if-condition.</li>
108 * <li>Import and package statements are parsed as meta information.</li>
109 * <li>Annotations are recognized as meta information, but only annotations at
110 * types and methods. Annotations at parameters are not parsed, as the parser
111 * does not parse into the parameter list of methods.</li>
112 * <li>The parser does not recognize anonymous classes. These are treated as a
113 * single long statement or attribute. Inner classes, however, are parsed
114 * correctly.</li>
115 * <li>The parser can deal with multiple classes in a single file.</li>
116 * </ul>
117 */
118public class JavaShallowParser extends CStyleShallowParserBase {
119
120        /** {@inheritDoc} */
121        @Override
122        protected void createMetaRules() {
123                // imports and package
124                inAnyState().sequence(IMPORT, STATIC).skipTo(SEMICOLON)
125                                .createNode(EShallowEntityType.META, SubTypeNames.STATIC_IMPORT, new Region(2, -2)).endNode();
126                inAnyState().sequence(EnumSet.of(IMPORT, PACKAGE)).skipTo(SEMICOLON)
127                                .createNode(EShallowEntityType.META, 0, new Region(1, -2)).endNode();
128
129                // annotations; the spec allows both whitespace and comments between the
130                // '@' and the identifier, but as we filter comments before-hand, this
131                // is not an issue.
132                inAnyState().sequence(AT_OPERATOR, IDENTIFIER).repeated(DOT, IDENTIFIER)
133                                .createNode(EShallowEntityType.META, ANNOTATION, new Region(1, -1)).skipNested(LPAREN, RPAREN)
134                                .endNode();
135
136                super.createMetaRules();
137        }
138
139        @Override
140        protected void createTypeRules() {
141                createEnumTypeRule();
142                super.createTypeRules();
143        }
144
145        /**
146         * Enums, like classes, can be declared top-level or nested. At the head of the
147         * enum type are its enum literal declarations. This is implemented by the
148         * change of the parser to the <code>IN_ENUM</code> state.
149         */
150        private void createEnumTypeRule() {
151                RecognizerBase<EGenericParserStates> typeNode = inState(TOP_LEVEL, IN_TYPE).repeated(getTypeModifier())
152                                .sequence(ENUM, IDENTIFIER).createNode(TYPE, SubTypeNames.ENUM, -1);
153                RecognizerBase<EGenericParserStates> inEnumRule = typeNode.skipTo(LBRACE).parseUntil(IN_ENUM);
154                inEnumRule.sequence(SEMICOLON).parseUntil(IN_TYPE).sequence(RBRACE).endNode();
155                inEnumRule.sequence(RBRACE).endNode();
156        }
157
158        /** {@inheritDoc} */
159        @Override
160        protected EnumSet<ETokenType> getTypeModifier() {
161                return EnumSet.of(PUBLIC, PROTECTED, PRIVATE, ABSTRACT, STATIC, FINAL);
162        }
163
164        /** {@inheritDoc} */
165        @Override
166        protected EnumSet<ETokenType> getTypeKeywords() {
167                return EnumSet.of(CLASS, INTERFACE, ANNOTATION_INTERFACE);
168        }
169
170        /** {@inheritDoc} */
171        @Override
172        protected void createClassElementsRules() {
173                createMethodRule();
174
175                // Enum literals
176                RecognizerBase<EGenericParserStates> enumLiteral = inState(IN_ENUM).markStart().sequence(IDENTIFIER)
177                                .skipNested(LPAREN, RPAREN).sequenceBefore(EnumSet.of(SEMICOLON, COMMA, LBRACE, RBRACE))
178                                .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0);
179                enumLiteral.sequence(LBRACE).parseUntil(IN_TYPE).sequence(RBRACE).optional(COMMA).endNode();
180                enumLiteral.optional(COMMA).endNode();
181
182                createConstructorRule();
183
184                // attributes (must be after method, as this would also match methods)
185                typePatternInState(IN_TYPE).sequence(IDENTIFIER).createNode(EShallowEntityType.ATTRIBUTE, "attribute", -1)
186                                .skipToWithNesting(SEMICOLON, LBRACE, RBRACE, getSubExpressionRecognizer()).endNode();
187
188                // static initializer
189                inState(IN_TYPE).sequence(STATIC, LBRACE).createNode(EShallowEntityType.METHOD, "static initializer", "<sinit>")
190                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
191
192                // non-static initializer
193                inState(IN_TYPE).sequence(LBRACE).createNode(EShallowEntityType.METHOD, "non-static initializer", "<init>")
194                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
195        }
196
197        /** Recognizes methods. */
198        private void createMethodRule() {
199                inState(IN_TYPE).sequence(DEFAULT).skipBefore(IDENTIFIER, LPAREN).markStart().sequence(IDENTIFIER, LPAREN)
200                                .skipToWithNesting(RPAREN, LPAREN, RPAREN).skipTo(LBRACE)
201                                .createNode(EShallowEntityType.METHOD, "default method", 0).parseUntil(IN_METHOD).sequence(RBRACE)
202                                .endNode();
203
204                RecognizerBase<EGenericParserStates> methodAlternative = typePatternInState(IN_TYPE).markStart()
205                                .sequence(IDENTIFIER, LPAREN).skipToWithNesting(RPAREN, LPAREN, RPAREN)
206                                .skipBefore(EnumSet.of(LBRACE, SEMICOLON, DEFAULT));
207                methodAlternative.sequence(LBRACE).createNode(EShallowEntityType.METHOD, "method", 0).parseUntil(IN_METHOD)
208                                .sequence(RBRACE).endNode();
209                // annotations
210                methodAlternative.sequence(DEFAULT).createNode(EShallowEntityType.METHOD, SubTypeNames.ABSTRACT, 0)
211                                .skipToWithNesting(SEMICOLON, LBRACE, RBRACE).endNode();
212                methodAlternative.sequence(SEMICOLON).createNode(EShallowEntityType.METHOD, SubTypeNames.ABSTRACT, 0).endNode();
213        }
214
215        /**
216         * Recognizes constructors. Enum literals are recognized in the
217         * {@link EGenericParserStates#IN_ENUM} state
218         */
219        private void createConstructorRule() {
220                inState(IN_TYPE).optional(EnumSet.of(PUBLIC, PRIVATE, PROTECTED)).markStart().sequence(IDENTIFIER, LPAREN)
221                                .skipToWithNesting(RPAREN, LPAREN, RPAREN).skipTo(LBRACE)
222                                .createNode(EShallowEntityType.METHOD, CONSTRUCTOR, 0).parseUntil(IN_METHOD).sequence(RBRACE).endNode();
223        }
224
225        /** {@inheritDoc} */
226        @Override
227        protected EnumSet<ETokenType> getSimpleBlockKeywordsWithParentheses() {
228                return EnumSet.of(WHILE, FOR, SWITCH, SYNCHRONIZED);
229        }
230
231        /** {@inheritDoc} */
232        @Override
233        protected EnumSet<ETokenType> getSimpleBlockKeywordsWithoutParentheses() {
234                return EnumSet.of(ELSE, FINALLY);
235        }
236
237        /** {@inheritDoc} */
238        @Override
239        protected EnumSet<ETokenType> getStatementStartTokens() {
240                return EnumSet.of(NEW, BREAK, CONTINUE, RETURN, ASSERT, FINAL, GOTO, SUPER, THIS, THROW, LPAREN, PLUSPLUS,
241                                MINUSMINUS, IDENTIFIER);
242        }
243
244        /** {@inheritDoc} */
245        @Override
246        protected void createCaseRule() {
247                super.createCaseRule();
248
249                // Java also allows fully/partially qualified constants
250                inState(IN_METHOD).markStart().sequence(CASE).repeated(IDENTIFIER, DOT).sequence(IDENTIFIER, COLON)
251                                .createNode(EShallowEntityType.META, 0, new Region(1, -2)).endNode();
252        }
253
254        /** {@inheritDoc} */
255        @Override
256        protected RecognizerBase<EGenericParserStates> typePattern(RecognizerBase<EGenericParserStates> currentState) {
257                EnumSet<ETokenType> modifierKeywords = EnumSet.of(STATIC, FINAL, PRIVATE, PROTECTED, PUBLIC, ABSTRACT, NATIVE,
258                                SYNCHRONIZED, TRANSIENT, VOLATILE, DEFAULT);
259                EnumSet<ETokenType> typeNames = EnumSet.of(IDENTIFIER, VOID, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, CHAR,
260                                BOOLEAN);
261
262                RecognizerBase<EGenericParserStates> annotationSubRecognizer = createRecognizer(
263                                start -> start.sequence(AT_OPERATOR, IDENTIFIER).repeated(DOT, IDENTIFIER));
264
265                return currentState.repeated(modifierKeywords).skipNested(LT, GT).repeatedSubRecognizer(annotationSubRecognizer)
266                                .repeated(IDENTIFIER, DOT).optional(THIS, DOT).sequence(typeNames).skipNested(LT, GT)
267                                .repeatedSubRecognizer(annotationSubRecognizer).skipAny(EnumSet.of(LBRACK, RBRACK));
268        }
269
270        /** {@inheritDoc} */
271        @Override
272        protected void createSubExpressionRules() {
273                inState(IN_EXPRESSION).sequence(NEW).markStart().repeated(IDENTIFIER, DOT).sequence(IDENTIFIER)
274                                .createNode(EShallowEntityType.TYPE, SubTypeNames.ANONYMOUS_CLASS, new Region(0, -1)).skipNested(LT, GT)
275                                .skipNested(LPAREN, RPAREN, getSubExpressionRecognizer()).sequence(LBRACE).parseUntil(IN_TYPE)
276                                .sequence(RBRACE).endNode();
277
278                createLambdaWithArrowRules(ARROW);
279        }
280
281        /** {@inheritDoc} */
282        @Override
283        protected RecognizerBase<EGenericParserStates> getSubExpressionRecognizer() {
284                return new JavaAnonymousClassAndLambdaRecognizer();
285        }
286}