001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.groovy;
018
019import static eu.cqse.check.framework.scanner.ETokenType.ABSTRACT;
020import static eu.cqse.check.framework.scanner.ETokenType.ANNOTATION_INTERFACE;
021import static eu.cqse.check.framework.scanner.ETokenType.ARROW;
022import static eu.cqse.check.framework.scanner.ETokenType.AS;
023import static eu.cqse.check.framework.scanner.ETokenType.ASSERT;
024import static eu.cqse.check.framework.scanner.ETokenType.AT_OPERATOR;
025import static eu.cqse.check.framework.scanner.ETokenType.BOOLEAN;
026import static eu.cqse.check.framework.scanner.ETokenType.BOOLEAN_LITERAL;
027import static eu.cqse.check.framework.scanner.ETokenType.BREAK;
028import static eu.cqse.check.framework.scanner.ETokenType.CASE;
029import static eu.cqse.check.framework.scanner.ETokenType.CATCH;
030import static eu.cqse.check.framework.scanner.ETokenType.CHAR;
031import static eu.cqse.check.framework.scanner.ETokenType.CHARACTER_LITERAL;
032import static eu.cqse.check.framework.scanner.ETokenType.CLASS;
033import static eu.cqse.check.framework.scanner.ETokenType.COLON;
034import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
035import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE;
036import static eu.cqse.check.framework.scanner.ETokenType.DEF;
037import static eu.cqse.check.framework.scanner.ETokenType.DEFAULT;
038import static eu.cqse.check.framework.scanner.ETokenType.DOT;
039import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE;
040import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
041import static eu.cqse.check.framework.scanner.ETokenType.ENUM;
042import static eu.cqse.check.framework.scanner.ETokenType.FINAL;
043import static eu.cqse.check.framework.scanner.ETokenType.FINALLY;
044import static eu.cqse.check.framework.scanner.ETokenType.FLOAT;
045import static eu.cqse.check.framework.scanner.ETokenType.FLOATING_POINT_LITERAL;
046import static eu.cqse.check.framework.scanner.ETokenType.FOR;
047import static eu.cqse.check.framework.scanner.ETokenType.GOTO;
048import static eu.cqse.check.framework.scanner.ETokenType.GT;
049import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
050import static eu.cqse.check.framework.scanner.ETokenType.IF;
051import static eu.cqse.check.framework.scanner.ETokenType.IMPORT;
052import static eu.cqse.check.framework.scanner.ETokenType.INT;
053import static eu.cqse.check.framework.scanner.ETokenType.INTEGER_LITERAL;
054import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE;
055import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
056import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
057import static eu.cqse.check.framework.scanner.ETokenType.LONG;
058import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
059import static eu.cqse.check.framework.scanner.ETokenType.LT;
060import static eu.cqse.check.framework.scanner.ETokenType.MINUS;
061import static eu.cqse.check.framework.scanner.ETokenType.MINUSMINUS;
062import static eu.cqse.check.framework.scanner.ETokenType.MULT;
063import static eu.cqse.check.framework.scanner.ETokenType.NATIVE;
064import static eu.cqse.check.framework.scanner.ETokenType.NEW;
065import static eu.cqse.check.framework.scanner.ETokenType.NULL_LITERAL;
066import static eu.cqse.check.framework.scanner.ETokenType.PACKAGE;
067import static eu.cqse.check.framework.scanner.ETokenType.PLUSPLUS;
068import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE;
069import static eu.cqse.check.framework.scanner.ETokenType.PROTECTED;
070import static eu.cqse.check.framework.scanner.ETokenType.PUBLIC;
071import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
072import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
073import static eu.cqse.check.framework.scanner.ETokenType.RETURN;
074import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
075import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
076import static eu.cqse.check.framework.scanner.ETokenType.SHORT;
077import static eu.cqse.check.framework.scanner.ETokenType.STATIC;
078import static eu.cqse.check.framework.scanner.ETokenType.STRICTFP;
079import static eu.cqse.check.framework.scanner.ETokenType.STRING_LITERAL;
080import static eu.cqse.check.framework.scanner.ETokenType.SUPER;
081import static eu.cqse.check.framework.scanner.ETokenType.SWITCH;
082import static eu.cqse.check.framework.scanner.ETokenType.SYNCHRONIZED;
083import static eu.cqse.check.framework.scanner.ETokenType.THIS;
084import static eu.cqse.check.framework.scanner.ETokenType.THROW;
085import static eu.cqse.check.framework.scanner.ETokenType.THROWS;
086import static eu.cqse.check.framework.scanner.ETokenType.TRAIT;
087import static eu.cqse.check.framework.scanner.ETokenType.TRANSIENT;
088import static eu.cqse.check.framework.scanner.ETokenType.TRY;
089import static eu.cqse.check.framework.scanner.ETokenType.VOID;
090import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
091import static eu.cqse.check.framework.shallowparser.languages.groovy.EGroovyShallowParserStates.IN_ENUM_TYPE;
092import static eu.cqse.check.framework.shallowparser.languages.groovy.EGroovyShallowParserStates.IN_EXPRESSION;
093import static eu.cqse.check.framework.shallowparser.languages.groovy.EGroovyShallowParserStates.IN_METHOD;
094import static eu.cqse.check.framework.shallowparser.languages.groovy.EGroovyShallowParserStates.IN_TYPE;
095import static eu.cqse.check.framework.shallowparser.languages.groovy.EGroovyShallowParserStates.TOP_LEVEL;
096
097import java.util.Arrays;
098import java.util.EnumSet;
099
100import org.conqat.lib.commons.region.Region;
101
102import eu.cqse.check.framework.scanner.ETokenType;
103import eu.cqse.check.framework.shallowparser.SubTypeNames;
104import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
105import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
106import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
107
108/**
109 * A Groovy shallow parser following the language specifications found at:
110 * <a href="http://www.groovy-lang.org/documentation.html">http://www.
111 * groovy-lang.org/documentation.html</a>. <br>
112 * The groovy repository can be found at:
113 * <a href="https://github.com/apache/groovy/">https://github.com/apache/groovy/
114 * </a>.
115 * 
116 * <br>
117 * A few notes:
118 * <ul>
119 * <li>This parser recognizes all types (classes, enums, interfaces,
120 * annotations, traits), methods, attributes, local variables and control
121 * structures (if, try, switch, loops).</li>
122 * <li>The nodes for import, package, annotations, case, default and labels are
123 * reported as meta entities.</li>
124 * <li>Closures are reported as anonymous functions.</li>
125 * <li>Closures are parsed as well, as they are very versatile on Groovy.</li>
126 * <li>Anonymous inner classes and closures are also detected within expressions
127 * and arrays.</li>
128 * </ul>
129 * 
130 * <br>
131 * Known Issues:
132 * <ul>
133 * <li>Groovy allows optional parenthesis for method calls. This means that
134 * constructs like 'println a' (which normally stands for 'println(a)') cannot
135 * be distinguished from local variables. This parser creates local variables in
136 * these cases.</li>
137 * <li>Constructors and methods can not be reliably distinguished. E.g.
138 * ACCESS_MODIFIER IDENTIFIER () {...} is only a constructor if the identifier
139 * has the same name as the type. Therefore we prefer matching methods in
140 * ambiguous cases.</li>
141 * </ul>
142 */
143public class GroovyShallowParser extends ShallowParserBase<EGroovyShallowParserStates> {
144
145        /** All primitive data types. */
146        private static final EnumSet<ETokenType> PRIMITIVE_DATATYPES = EnumSet.of(BOOLEAN, CHAR, SHORT, INT, LONG, FLOAT,
147                        DOUBLE);
148
149        /** All literal types. */
150        private static final EnumSet<ETokenType> LITERALS = EnumSet.of(BOOLEAN_LITERAL, INTEGER_LITERAL,
151                        FLOATING_POINT_LITERAL, NULL_LITERAL, STRING_LITERAL, CHARACTER_LITERAL);
152
153        /** All access modifiers. */
154        private static final EnumSet<ETokenType> ACCESS_MODIFIERS = EnumSet.of(PRIVATE, PROTECTED, PUBLIC);
155
156        /** All type modifiers. */
157        private static final EnumSet<ETokenType> TYPE_MODIFIERS = EnumSet.of(ABSTRACT, STATIC, FINAL, STRICTFP);
158
159        /** All method modifiers. */
160        private static final EnumSet<ETokenType> METHOD_MODIFIERS = EnumSet.of(ABSTRACT, STATIC, FINAL, NATIVE,
161                        SYNCHRONIZED, TRANSIENT, STRICTFP);
162
163        static {
164                TYPE_MODIFIERS.addAll(ACCESS_MODIFIERS);
165                METHOD_MODIFIERS.addAll(ACCESS_MODIFIERS);
166        }
167
168        /** Constructor. */
169        public GroovyShallowParser() {
170                super(EGroovyShallowParserStates.class, TOP_LEVEL);
171                createMetaRules();
172                createInitializerRules();
173                createTypeRules();
174                createFieldRules();
175                createEnumLiteralRule();
176                createMethodDefinitionRules();
177                createConstructorDefinitionRule();
178                createInExpressionRules();
179                createLoopRules();
180                createSwitchCaseRules();
181                createContinuationRules();
182                createStatementRule();
183                createEmptyStatementRule();
184        }
185
186        /** Creates various rules that create META shallow entities. */
187        private void createMetaRules() {
188                createPackageRule();
189                createImportRule();
190                createAnnotationRule();
191                createLabelRule();
192        }
193
194        /** Creates rule for the package line. */
195        private void createPackageRule() {
196                inState(TOP_LEVEL).sequence(PACKAGE).markStart().sequence(IDENTIFIER)
197                                .repeated(DOT, EnumSet.of(IDENTIFIER, MULT))
198                                .createNode(EShallowEntityType.META, SubTypeNames.PACKAGE, new Region(0, -1)).optional(SEMICOLON)
199                                .endNode();
200        }
201
202        /** Creates rule for imports. */
203        private void createImportRule() {
204                inState(TOP_LEVEL).sequence(IMPORT).optional(STATIC).markStart().sequence(IDENTIFIER)
205                                .repeated(DOT, EnumSet.of(IDENTIFIER, MULT))
206                                .createNode(EShallowEntityType.META, SubTypeNames.IMPORT, new Region(0, -1)).optional(AS, IDENTIFIER)
207                                .optional(SEMICOLON).endNode();
208        }
209
210        /** Creates a rule for annotations. */
211        private void createAnnotationRule() {
212                inAnyState().sequence(AT_OPERATOR, IDENTIFIER).repeated(DOT, IDENTIFIER)
213                                .createNode(EShallowEntityType.META, SubTypeNames.ANNOTATION, new Region(1, -1))
214                                .skipNested(LPAREN, RPAREN).endNode();
215        }
216
217        /** Creates a rule for labels. */
218        private void createLabelRule() {
219                inAnyState().sequence(IDENTIFIER, COLON).createNode(EShallowEntityType.META, SubTypeNames.LABEL, -2).endNode();
220        }
221
222        /**
223         * Creates rules that recognize static and non-static initializer blocks.
224         */
225        private void createInitializerRules() {
226                inState(IN_TYPE, IN_ENUM_TYPE).sequence(STATIC).sequence(LBRACE)
227                                .createNode(EShallowEntityType.METHOD, "static initializer", "<sinit>").parseUntil(IN_METHOD)
228                                .sequence(RBRACE).endNode();
229
230                inState(IN_TYPE, IN_ENUM_TYPE).sequence(LBRACE)
231                                .createNode(EShallowEntityType.METHOD, "non-static initializer", "<init>").parseUntil(IN_METHOD)
232                                .sequence(RBRACE).endNode();
233        }
234
235        /**
236         * Creates rules for all types. These are CLASS, INTERFACE,
237         * ANNOTATION_INTERFACE, TRAIT and ENUM.
238         */
239        private void createTypeRules() {
240                createTypeRule(CLASS, SubTypeNames.CLASS, IN_TYPE);
241                createTypeRule(INTERFACE, SubTypeNames.INTERFACE, IN_TYPE);
242                createTypeRule(ANNOTATION_INTERFACE, SubTypeNames.ANNOTATION, IN_TYPE);
243                createTypeRule(TRAIT, SubTypeNames.TRAIT, IN_TYPE);
244                createTypeRule(ENUM, SubTypeNames.ENUM, IN_ENUM_TYPE);
245        }
246
247        /** Creates rule for one given type. */
248        private void createTypeRule(ETokenType type, String subTypeName, EGroovyShallowParserStates subState) {
249                inState(TOP_LEVEL, IN_TYPE, IN_ENUM_TYPE).repeated(TYPE_MODIFIERS).sequence(type).sequence(IDENTIFIER)
250                                .createNode(EShallowEntityType.TYPE, subTypeName, -1).skipTo(LBRACE).parseUntil(subState)
251                                .sequence(RBRACE).endNode();
252        }
253
254        /** Creates rules for enum literals. */
255        private void createEnumLiteralRule() {
256                RecognizerBase<EGroovyShallowParserStates> enumBase = inState(IN_ENUM_TYPE)
257                                .preCondition(new GroovyPrecedingEnumLiteralRecognizer()).markStart().sequence(IDENTIFIER)
258                                .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0)
259                                .skipNested(LPAREN, RPAREN, createSubExpressionRecognizer());
260
261                // A(...){<IN_TYPE>}, B{<IN_TYPE>}
262                enumBase.sequence(LBRACE).parseUntil(IN_TYPE).sequence(RBRACE).optional(EnumSet.of(COMMA, SEMICOLON)).endNode();
263
264                // A(...),B,C
265                enumBase.optional(EnumSet.of(COMMA, SEMICOLON)).endNode();
266
267        }
268
269        /** Creates rules for methods. */
270        private void createMethodDefinitionRules() {
271                createMethodDefinitionRulesInState(TOP_LEVEL);
272                createMethodDefinitionRulesInState(IN_TYPE);
273                createMethodDefinitionRulesInState(IN_ENUM_TYPE);
274        }
275
276        /** Creates rules for methods in a specific state. */
277        private void createMethodDefinitionRulesInState(EGroovyShallowParserStates state) {
278
279                EnumSet<ETokenType> returnTypes = EnumSet.of(VOID, DEF, IDENTIFIER);
280                returnTypes.addAll(PRIMITIVE_DATATYPES);
281
282                // Methods with at least one modifier and return types.
283                RecognizerBase<EGroovyShallowParserStates> baseWithModifier = inState(state).sequence(METHOD_MODIFIERS)
284                                .repeated(METHOD_MODIFIERS).skipNested(LT, GT).sequence(returnTypes).repeated(DOT, returnTypes)
285                                .skipNested(LT, GT).skipAny(EnumSet.of(LBRACK, RBRACK));
286
287                endMethodDefinitionRule(baseWithModifier, state);
288
289                // Methods with at least one return type.
290                RecognizerBase<EGroovyShallowParserStates> baseWithReturnType = inState(state).repeated(METHOD_MODIFIERS)
291                                .skipNested(LT, GT).sequence(returnTypes).repeated(DOT, returnTypes).skipNested(LT, GT)
292                                .skipAny(EnumSet.of(LBRACK, RBRACK));
293
294                endMethodDefinitionRule(baseWithReturnType, state);
295
296                // Methods guaranteed without return type. Need at least one modifier.
297                RecognizerBase<EGroovyShallowParserStates> baseWithoutReturnType = inState(state).sequence(METHOD_MODIFIERS)
298                                .repeated(METHOD_MODIFIERS).skipNested(LT, GT);
299
300                endMethodDefinitionRule(baseWithoutReturnType, state);
301        }
302
303        /**
304         * Ends a method definition rule. i.e. matches every method beginning with
305         * its identifier and creates the shallow entity.
306         */
307        private static void endMethodDefinitionRule(RecognizerBase<EGroovyShallowParserStates> base,
308                        EGroovyShallowParserStates state) {
309
310                // Matches 'methodIdentifier(...)'.
311                RecognizerBase<EGroovyShallowParserStates> methodDef = base.markStart().sequence(IDENTIFIER).sequence(LPAREN)
312                                .skipToWithNesting(RPAREN, LPAREN, RPAREN);
313
314                // Matches 'throws java.io.IOException, java.io.IOError'.
315                RecognizerBase<EGroovyShallowParserStates> throwsMethodDef = methodDef.sequence(THROWS).sequence(IDENTIFIER)
316                                .repeated(EnumSet.of(COMMA, DOT), IDENTIFIER)
317                                .createNode(EShallowEntityType.METHOD, SubTypeNames.METHOD, 0);
318
319                // Non abstract methods continue with '{...}'
320                throwsMethodDef.sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE).endNode();
321
322                // Non abstract methods continue with '{...}'
323                methodDef.sequence(LBRACE).createNode(EShallowEntityType.METHOD, SubTypeNames.METHOD, 0).parseUntil(IN_METHOD)
324                                .sequence(RBRACE).endNode();
325
326                // Abstract methods are methods without '{...}'.
327                // They can only appear in types.
328                if (state == EGroovyShallowParserStates.IN_TYPE) {
329                        methodDef.createNode(EShallowEntityType.METHOD, SubTypeNames.METHOD, 0).optional(SEMICOLON).endNode();
330
331                        throwsMethodDef.optional(SEMICOLON).endNode();
332                }
333        }
334
335        /**
336         * Creates rule for constructors that matches everything that is left after
337         * the method definition rules applied. Therefore we do not check for access
338         * modifiers (It would be recognized as method anyway).
339         */
340        private void createConstructorDefinitionRule() {
341                inState(IN_TYPE, IN_ENUM_TYPE).markStart().sequence(IDENTIFIER).sequence(LPAREN)
342                                .createNode(EShallowEntityType.METHOD, SubTypeNames.CONSTRUCTOR, 0).skipTo(LBRACE).parseUntil(IN_METHOD)
343                                .sequence(RBRACE).endNode();
344        }
345
346        /** Create rules for 'for' and 'while' loops. */
347        private void createLoopRules() {
348                RecognizerBase<EGroovyShallowParserStates> loopBase = inState(IN_METHOD, TOP_LEVEL).markStart()
349                                .sequence(EnumSet.of(FOR, WHILE)).createNode(EShallowEntityType.STATEMENT, 0)
350                                .skipNested(LPAREN, RPAREN, createSubExpressionRecognizer());
351
352                // Loop body within braces.
353                loopBase.sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE).endNode();
354
355                // Single statement loop body.
356                loopBase.parseOnce(IN_METHOD).endNode();
357
358                // There is no do-while in groovy
359        }
360
361        /** Create switch case rules. */
362        private void createSwitchCaseRules() {
363                inState(TOP_LEVEL, IN_METHOD).sequence(SWITCH).createNode(EShallowEntityType.STATEMENT, SubTypeNames.SWITCH)
364                                .skipNested(LPAREN, RPAREN, createSubExpressionRecognizer()).skipTo(LBRACE).parseUntil(IN_METHOD)
365                                .sequence(RBRACE).endNode();
366
367                // Matches 'case { it < 0 }:'.
368                inState(IN_METHOD).sequence(CASE).sequenceBefore(LBRACE).createNode(EShallowEntityType.META, SubTypeNames.CASE)
369                                .parseOnce(IN_EXPRESSION).sequence(COLON).endNode();
370
371                // Matches 'case [4, 5, 6, 'inList']:' and 'case ... :'.
372                inState(IN_METHOD).sequence(CASE).createNode(EShallowEntityType.META, SubTypeNames.CASE)
373                                .skipToWithNesting(COLON, LBRACK, RBRACK).endNode();
374
375                inState(IN_METHOD).sequence(DEFAULT).createNode(EShallowEntityType.META, SubTypeNames.DEFAULT).skipTo(COLON)
376                                .endNode();
377        }
378
379        /**
380         * Creates rules for statements with continuation like 'if-elseif-else'
381         * statements and 'try-catch-finally'.
382         */
383        private void createContinuationRules() {
384                createRuleWithContinuation(IF, ELSE, ELSE, IF);
385                createRuleWithContinuation(TRY, FINALLY, CATCH);
386        }
387
388        /** Creates rules for statements with continuation. */
389        private void createRuleWithContinuation(ETokenType first, ETokenType last, ETokenType... middle) {
390                RecognizerBase<EGroovyShallowParserStates> baseMid = inState(TOP_LEVEL, IN_METHOD).sequence((Object[]) middle);
391
392                RecognizerBase<EGroovyShallowParserStates> baseFirstLast = inState(TOP_LEVEL, IN_METHOD)
393                                .sequence(EnumSet.of(first, last));
394
395                EnumSet<ETokenType> continuationTokens = EnumSet.of(last);
396                continuationTokens.addAll(Arrays.asList(middle));
397
398                endContinuationNode(baseMid, continuationTokens);
399                endContinuationNode(baseFirstLast, continuationTokens);
400        }
401
402        /**
403         * Creates the node, skips the parenthesis, appends either a single
404         * statement or a block of statements and ends the node with a possible
405         * continuation.
406         */
407        private void endContinuationNode(RecognizerBase<EGroovyShallowParserStates> baseRecognizer,
408                        EnumSet<ETokenType> continuationTokens) {
409                baseRecognizer = baseRecognizer.createNode(EShallowEntityType.STATEMENT, new Region(0, -1)).skipNested(LPAREN,
410                                RPAREN, createSubExpressionRecognizer());
411
412                RecognizerBase<EGroovyShallowParserStates> baseMultipleStatement = baseRecognizer.sequence(LBRACE)
413                                .parseUntil(IN_METHOD).sequence(RBRACE);
414                RecognizerBase<EGroovyShallowParserStates> baseSingleStatement = baseRecognizer.parseOnce(IN_METHOD);
415
416                endWithPossibleContinuation(baseMultipleStatement, continuationTokens);
417                endWithPossibleContinuation(baseSingleStatement, continuationTokens);
418        }
419
420        /**
421         * Create rule for fields and attributes. This unfortunately also detects
422         * constructs like 'println a' (== 'println(a)') as fields, because the
423         * parenthesis in method calls are optional.
424         */
425        private void createFieldRules() {
426                createFieldRules(EShallowEntityType.ATTRIBUTE, SubTypeNames.ATTRIBUTE, IN_TYPE, IN_ENUM_TYPE);
427                createFieldRules(EShallowEntityType.STATEMENT, SubTypeNames.LOCAL_VARIABLE, TOP_LEVEL, IN_METHOD);
428
429        }
430
431        /** Creates rules for a specific type of fields. */
432        private void createFieldRules(EShallowEntityType shallowEntityType, String subtypename,
433                        EGroovyShallowParserStates... states) {
434
435                // Fields with at least one datatype.
436                RecognizerBase<EGroovyShallowParserStates> withDatatype = fieldWithDataType(states)
437                                .preCondition(createFieldRecognizer()).createNode(shallowEntityType, subtypename, -1);
438
439                // Fields with at least one modifier.
440                RecognizerBase<EGroovyShallowParserStates> withModifier = inState(states).sequence(TYPE_MODIFIERS)
441                                .repeated(TYPE_MODIFIERS).sequence(IDENTIFIER).preCondition(createFieldRecognizer())
442                                .createNode(shallowEntityType, subtypename, -1);
443
444                // Multiple assignment fields. E.g. 'def (int i, String j) = [1, 'f']'
445                RecognizerBase<EGroovyShallowParserStates> multipleAssignment = inState(states).sequence(DEF)
446                                .skipNested(LPAREN, RPAREN).preCondition(createFieldRecognizer())
447                                .createNode(shallowEntityType, subtypename, new Region(1, -1));
448
449                skipToEndOfStatement(withDatatype);
450                skipToEndOfStatement(withModifier);
451                skipToEndOfStatement(multipleAssignment);
452
453        }
454
455        /**
456         * Creates a recognizer for starting tokens of fields that contain at least
457         * one datatype.
458         */
459        private RecognizerBase<EGroovyShallowParserStates> fieldWithDataType(EGroovyShallowParserStates... states) {
460                EnumSet<ETokenType> datatypes = EnumSet.of(IDENTIFIER, DEF, THIS);
461                datatypes.addAll(PRIMITIVE_DATATYPES);
462
463                return inState(states).repeated(TYPE_MODIFIERS).sequence(datatypes).repeated(DOT, datatypes).skipNested(LT, GT)
464                                .sequence(IDENTIFIER);
465        }
466
467        /**
468         * Creates a recognizer that detects, if we currently parse a field (or if
469         * we are mistakenly in a simple statement). We cannot solve this with the
470         * common recognizers, as we have to check for new lines.
471         */
472        private static RecognizerBase<EGroovyShallowParserStates> createFieldRecognizer() {
473                return new GroovyFieldRecognizer();
474        }
475
476        /** Creates rule for simple statements. */
477        private void createStatementRule() {
478                EnumSet<ETokenType> statementStartTokens = EnumSet.of(IDENTIFIER, NEW, RETURN, THIS, CONTINUE, BREAK, GOTO,
479                                THROW, SUPER, ASSERT, MINUS, PLUSPLUS, MINUSMINUS);
480                statementStartTokens.addAll(PRIMITIVE_DATATYPES);
481                statementStartTokens.addAll(LITERALS);
482
483                RecognizerBase<EGroovyShallowParserStates> base = inState(IN_METHOD, TOP_LEVEL)
484                                .sequenceBefore(statementStartTokens)
485                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.SIMPLE_STATEMENT, 0);
486
487                skipToEndOfStatement(base, true);
488
489                // A statement may start with LPAREN for multiple assignments.
490                RecognizerBase<EGroovyShallowParserStates> inParenthesis = inState(IN_METHOD, TOP_LEVEL).sequenceBefore(LPAREN)
491                                .skipNested(LPAREN, RPAREN)
492                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.SIMPLE_STATEMENT, new Region(0, -1));
493                skipToEndOfStatement(inParenthesis);
494
495                // A statement may start with a LBRACK. Especially as one can omit the
496                // return keyword.
497                RecognizerBase<EGroovyShallowParserStates> inBrackets = inState(IN_METHOD, TOP_LEVEL).sequenceBefore(LBRACK)
498                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.SIMPLE_STATEMENT, 0)
499                                .skipNested(LBRACK, RBRACK, createSubExpressionRecognizer());
500
501                skipToEndOfStatement(inBrackets);
502        }
503
504        /** Creates a rule to recognize single semicolons as empty statements. */
505        private void createEmptyStatementRule() {
506                inAnyState().sequence(SEMICOLON).createNode(EShallowEntityType.STATEMENT, SubTypeNames.EMPTY_STATEMENT)
507                                .endNode();
508
509        }
510
511        /**
512         * Appends the {@link GroovySkipToEndOfStatementRecognizer} to the given
513         * {@link RecognizerBase} and closes the node.
514         */
515        private static void skipToEndOfStatement(RecognizerBase<EGroovyShallowParserStates> base) {
516                skipToEndOfStatement(base, false);
517        }
518
519        /**
520         * Appends the {@link GroovySkipToEndOfStatementRecognizer} to the given
521         * {@link RecognizerBase} and closes the node. Additionally set flag if at
522         * least one match should be forced.
523         */
524        private static void skipToEndOfStatement(RecognizerBase<EGroovyShallowParserStates> base, boolean forceMatch) {
525                GroovySkipToEndOfStatementRecognizer recognizer = new GroovySkipToEndOfStatementRecognizer();
526                recognizer.setForceMatch(forceMatch);
527                base.subRecognizer(recognizer, 0, 1).endNode();
528        }
529
530        /** Creates the rule for anonymous inner classes and closures. */
531        private void createInExpressionRules() {
532
533                // Anonymous inner classes
534                inState(IN_EXPRESSION).sequence(NEW, IDENTIFIER).repeated(DOT, IDENTIFIER).skipNested(LT, GT)
535                                .createNode(EShallowEntityType.TYPE, SubTypeNames.ANONYMOUS_CLASS, 1)
536                                .skipNested(LPAREN, RPAREN, createSubExpressionRecognizer()).sequence(LBRACE).parseUntil(IN_TYPE)
537                                .sequence(RBRACE).endNode();
538
539                // Closures
540                RecognizerBase<EGroovyShallowParserStates> closureBase = inState(IN_EXPRESSION).sequence(LBRACE)
541                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.ANONYMOUS_FUNCTION);
542
543                closureBase.skipBefore(EnumSet.of(ARROW, LBRACE)).sequence(ARROW).parseUntil(IN_METHOD).sequence(RBRACE)
544                                .endNode(); // with arrow
545
546                closureBase.parseUntil(IN_METHOD).sequence(RBRACE).endNode(); // without
547                                                                                                                                                // arrow
548        }
549
550        /**
551         * Creates a recognizer, that matches anonymous classes and closures within
552         * expressions.
553         */
554        private static RecognizerBase<EGroovyShallowParserStates> createSubExpressionRecognizer() {
555                return new GroovySubExpressionRecognizer();
556        }
557}