001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.cpp;
018
019import static eu.cqse.check.framework.scanner.ETokenType.ALIGNAS;
020import static eu.cqse.check.framework.scanner.ETokenType.AND;
021import static eu.cqse.check.framework.scanner.ETokenType.ANDAND;
022import static eu.cqse.check.framework.scanner.ETokenType.ASSERT;
023import static eu.cqse.check.framework.scanner.ETokenType.AUTO;
024import static eu.cqse.check.framework.scanner.ETokenType.BOOL;
025import static eu.cqse.check.framework.scanner.ETokenType.BREAK;
026import static eu.cqse.check.framework.scanner.ETokenType.BYTE;
027import static eu.cqse.check.framework.scanner.ETokenType.CASE;
028import static eu.cqse.check.framework.scanner.ETokenType.CHAR;
029import static eu.cqse.check.framework.scanner.ETokenType.CLASS;
030import static eu.cqse.check.framework.scanner.ETokenType.COLON;
031import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
032import static eu.cqse.check.framework.scanner.ETokenType.CONST;
033import static eu.cqse.check.framework.scanner.ETokenType.CONSTEXPR;
034import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE;
035import static eu.cqse.check.framework.scanner.ETokenType.DECLTYPE;
036import static eu.cqse.check.framework.scanner.ETokenType.DELETE;
037import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE;
038import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
039import static eu.cqse.check.framework.scanner.ETokenType.ENUM;
040import static eu.cqse.check.framework.scanner.ETokenType.EQ;
041import static eu.cqse.check.framework.scanner.ETokenType.EXPLICIT;
042import static eu.cqse.check.framework.scanner.ETokenType.EXTERN;
043import static eu.cqse.check.framework.scanner.ETokenType.FAR;
044import static eu.cqse.check.framework.scanner.ETokenType.FINAL;
045import static eu.cqse.check.framework.scanner.ETokenType.FLOAT;
046import static eu.cqse.check.framework.scanner.ETokenType.FOR;
047import static eu.cqse.check.framework.scanner.ETokenType.FRIEND;
048import static eu.cqse.check.framework.scanner.ETokenType.GOTO;
049import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
050import static eu.cqse.check.framework.scanner.ETokenType.INLINE;
051import static eu.cqse.check.framework.scanner.ETokenType.INT;
052import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE_CLASS;
053import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE_STRUCT;
054import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
055import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
056import static eu.cqse.check.framework.scanner.ETokenType.LONG;
057import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
058import static eu.cqse.check.framework.scanner.ETokenType.MINUSMINUS;
059import static eu.cqse.check.framework.scanner.ETokenType.MULT;
060import static eu.cqse.check.framework.scanner.ETokenType.MUTABLE;
061import static eu.cqse.check.framework.scanner.ETokenType.NAMESPACE;
062import static eu.cqse.check.framework.scanner.ETokenType.NEAR;
063import static eu.cqse.check.framework.scanner.ETokenType.NEW;
064import static eu.cqse.check.framework.scanner.ETokenType.NOEXCEPT;
065import static eu.cqse.check.framework.scanner.ETokenType.NORETURN;
066import static eu.cqse.check.framework.scanner.ETokenType.OPERATOR;
067import static eu.cqse.check.framework.scanner.ETokenType.PLUSPLUS;
068import static eu.cqse.check.framework.scanner.ETokenType.POINTERTO;
069import static eu.cqse.check.framework.scanner.ETokenType.PREPROCESSOR_DIRECTIVE;
070import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE;
071import static eu.cqse.check.framework.scanner.ETokenType.PUBLIC;
072import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
073import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
074import static eu.cqse.check.framework.scanner.ETokenType.REGISTER;
075import static eu.cqse.check.framework.scanner.ETokenType.RETURN;
076import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
077import static eu.cqse.check.framework.scanner.ETokenType.SCOPE;
078import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
079import static eu.cqse.check.framework.scanner.ETokenType.SHORT;
080import static eu.cqse.check.framework.scanner.ETokenType.SIGNED;
081import static eu.cqse.check.framework.scanner.ETokenType.STATIC;
082import static eu.cqse.check.framework.scanner.ETokenType.STRUCT;
083import static eu.cqse.check.framework.scanner.ETokenType.SUPER;
084import static eu.cqse.check.framework.scanner.ETokenType.SWITCH;
085import static eu.cqse.check.framework.scanner.ETokenType.THIS;
086import static eu.cqse.check.framework.scanner.ETokenType.THROW;
087import static eu.cqse.check.framework.scanner.ETokenType.TYPENAME;
088import static eu.cqse.check.framework.scanner.ETokenType.UNION;
089import static eu.cqse.check.framework.scanner.ETokenType.UNSIGNED;
090import static eu.cqse.check.framework.scanner.ETokenType.VIRTUAL;
091import static eu.cqse.check.framework.scanner.ETokenType.VOID;
092import static eu.cqse.check.framework.scanner.ETokenType.WCHAR_T;
093import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
094import static eu.cqse.check.framework.scanner.ETokenType.XOR;
095import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_ENUM;
096import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_EXPRESSION;
097import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_METHOD;
098import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_MODULE;
099import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_TYPE;
100import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.TOP_LEVEL;
101
102import java.util.ArrayList;
103import java.util.Arrays;
104import java.util.EnumSet;
105import java.util.HashSet;
106import java.util.List;
107import java.util.Set;
108
109import org.conqat.lib.commons.collections.CollectionUtils;
110import org.conqat.lib.commons.region.Region;
111
112import eu.cqse.check.framework.scanner.ETokenType;
113import eu.cqse.check.framework.scanner.IToken;
114import eu.cqse.check.framework.shallowparser.SubTypeNames;
115import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
116import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
117import eu.cqse.check.framework.shallowparser.languages.base.CStyleShallowParserBase;
118import eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates;
119
120/**
121 * Shallow parser for C/C++14.
122 * <p>
123 * What this parser does and does not:
124 * <ul>
125 * <li>The parser recognizes types (classes, enums, interfaces), methods and
126 * attributes, individual statements and lambdas.</li>
127 * <li>It recognizes the nesting of statements (e.g. in loops), but does not
128 * parse into the statements. For example, it recognizes an if-statement and
129 * provides the list of sub-statements, but does not provide direct access to
130 * the if-condition.</li>
131 * <li>All preprocessor statements are parsed as meta information.</li>
132 * <li>Template declarations are parsed as preceding meta information.</li>
133 * <li>Forward declarations are handled as meta information.</li>
134 * <li>We heuristically filter code generating macros, such as
135 * "CREATE_STUFF(MyClass)".</li>
136 * </ul>
137 */
138public class CppShallowParser extends CStyleShallowParserBase {
139
140        /** Basic keywords used for primitive types in a C++ like language. */
141        protected static final EnumSet<ETokenType> PRIMITIVE_TYPES = EnumSet.of(VOID, BYTE, INT, FLOAT, DOUBLE, CHAR, BOOL,
142                        WCHAR_T);
143
144        /**
145         * Modifiers that can be used in front of the other primitive types. Note that
146         * in C you can write both "short int" and "short", so the "int" is assumed
147         * implicitly. Still, "short" is considered a modifier here, which is different
148         * from e.g. Java, where short would be a primitive on its own.
149         */
150        private static final EnumSet<ETokenType> PRIMITIVE_MODIFIERS = EnumSet.of(LONG, SHORT, SIGNED, UNSIGNED);
151
152        /** Token types that are used for type declarations. */
153        private static final EnumSet<ETokenType> TYPE_KEYWORDS = EnumSet.of(CLASS, STRUCT, UNION, ENUM);
154
155        /** Modifiers that can be specified after a lambda. */
156        private static final EnumSet<ETokenType> LAMBDA_MODIFIERS = EnumSet.of(CONSTEXPR, MUTABLE);
157
158        /** Base token types for types or identifiers (without primitive types). */
159        protected static final EnumSet<ETokenType> BASE_TYPE_OR_IDENTIFIER = EnumSet.of(AUTO, DECLTYPE, IDENTIFIER);
160
161        /** Token types for types or identifiers. */
162        private static final EnumSet<ETokenType> TYPE_OR_IDENTIFIER = CollectionUtils.enumUnionSet(BASE_TYPE_OR_IDENTIFIER,
163                        PRIMITIVE_TYPES);
164
165        /**
166         * Token types that are modifiers in front of a type declaration or method.
167         */
168        protected static final EnumSet<ETokenType> METHOD_AND_TYPE_DECLARATION_MODIFIERS = EnumSet.of(CONST, CONSTEXPR,
169                        STATIC, VIRTUAL, EXTERN, NEAR, FAR, ALIGNAS, MUTABLE, FRIEND, EXPLICIT, INLINE, NORETURN);
170
171        /**
172         * Interface modifiers used in C++/CLI extensions that execute in .NET managed
173         * execution environment.
174         */
175        private static final EnumSet<ETokenType> MS_INTERFACE_MODIFIERS = EnumSet.of(PUBLIC, PRIVATE);
176
177        /**
178         * Interface types used in C++/CLI extensions that execute in .NET managed
179         * execution environment.
180         */
181        private static final EnumSet<ETokenType> MS_INTERFACE_TYPES = EnumSet.of(INTERFACE_CLASS, INTERFACE_STRUCT);
182
183        /**
184         * Set of common "keywords" that are not actually part of the language but
185         * rather used by certain compilers and implicitly defined using macros. The
186         * solution used here is to filter them out.
187         */
188        private static final Set<String> PSEUDO_KEYWORDS = new HashSet<>(Arrays.asList(
189                        // typically found in Windows compilers
190                        "__fastcall", "__export", "__forceinline", "_cdecl", "_stdcall", "__stdcall", "WINAPI", "APIENTRY",
191                        "CALLBACK",
192                        // used by the common Qt library
193                        "Q_EXPORT",
194                        // keywords found in ISA dialog manager
195                        "DML_c", "DM_CALLBACK", "__1", "__2", "__3", "__4", "__5", "__6", "__7", "DM_ENTRY", "DML_pascal",
196                        "DML_default",
197                        // project specific keywords
198                        "IGVPWORD_API"));
199
200        public CppShallowParser() {
201                createNamespaceRules();
202        }
203
204        @Override
205        protected void createMetaRules() {
206                new CppShallowParserMetaRules(this).contributeRules();
207                super.createMetaRules();
208        }
209
210        @Override
211        protected void createClassElementsRules() {
212                new CppShallowParserClassElementRules(this).contributeRules();
213        }
214
215        /** Creates namespace specific rules. */
216        private void createNamespaceRules() {
217                // namespace
218                RecognizerBase<EGenericParserStates> namespaceAlternative = inAnyState().sequence(NAMESPACE)
219                                .repeatedSubRecognizer(createAnnotationSubrecognizer()).markStart()
220                                .skipBefore(EnumSet.of(SEMICOLON, LBRACE));
221                namespaceAlternative.sequence(LBRACE)
222                                .createNode(EShallowEntityType.MODULE, SubTypeNames.NAMESPACE, new Region(0, -2)).parseUntil(TOP_LEVEL)
223                                .sequence(RBRACE).endNode();
224                namespaceAlternative.sequence(SEMICOLON)
225                                .createNode(EShallowEntityType.META, SubTypeNames.NAMESPACE, new Region(0, -2)).endNode();
226        }
227
228        @Override
229        protected void createTypeRules() {
230                createTypedefRules();
231                createEnumRules();
232
233                // types; we have to ensure when skipping to the LBRACE, that there is
234                // no earlier SEMICOLON or EQ, as in these cases it is a forward
235                // declaration or a variable.
236                for (ETokenType typeKeyword : getTypeKeywords()) {
237                        inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(typeKeyword)
238                                        .repeatedSubRecognizer(createAnnotationSubrecognizer()).markStart().sequence(getValidIdentifiers())
239                                        // prevent parsing functions that return a struct to be recognized as types
240                                        .sequenceBefore(EnumSet.complementOf(EnumSet.of(IDENTIFIER, MULT))) //
241                                        .skipBefore(EnumSet.of(SEMICOLON, LBRACE, EQ)).sequence(LBRACE)
242                                        .createNode(EShallowEntityType.TYPE, typeKeyword.name().toLowerCase(), 0).parseUntil(IN_TYPE)
243                                        .sequence(RBRACE).optional(SEMICOLON).endNode();
244                }
245
246                // Rule for recognizing interface classes/structs used in C++/CLI extensions
247                // that execute in .NET managed
248                // execution environment.
249                inState(TOP_LEVEL, IN_MODULE).optional(MS_INTERFACE_MODIFIERS).sequence(MS_INTERFACE_TYPES).markStart()
250                                .sequence(getValidIdentifiers()).skipBefore(LBRACE).sequence(LBRACE)
251                                .createNode(EShallowEntityType.TYPE, SubTypeNames.INTERFACE_CLASS_NAME, 0).parseUntil(IN_TYPE)
252                                .sequence(RBRACE).optional(SEMICOLON).endNode();
253
254                // anonymous types
255                inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(getTypeKeywords(), LBRACE)
256                                .createNode(EShallowEntityType.TYPE, 0, "<anonymous>").parseUntil(IN_TYPE).sequence(RBRACE)
257                                .optional(SEMICOLON).endNode();
258        }
259
260        /** Creates rules for parsing enums and enum classes. */
261        private void createEnumRules() {
262                // enum (both anonymous and named)
263                finishEnumDeclaration(inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(ENUM), SubTypeNames.ENUM, 1);
264                finishEnumDeclaration(inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(ENUM, CLASS), SubTypeNames.ENUM_CLASS, 2);
265
266                RecognizerBase<EGenericParserStates> enumLiteralAlternative = inState(IN_ENUM).sequence(IDENTIFIER)
267                                .optionalSubRecognizer(createAnnotationSubrecognizer());
268                finishEnumLiteral(enumLiteralAlternative.sequenceBefore(EnumSet.of(COMMA, RBRACE)));
269                finishEnumLiteral(enumLiteralAlternative.sequence(EQ).skipBefore(EnumSet.of(COMMA, RBRACE)));
270
271                RecognizerBase<EGenericParserStates> typedEnumLiteralAlternative = typePatternInState(IN_ENUM)
272                                .sequence(IDENTIFIER).optionalSubRecognizer(createAnnotationSubrecognizer());
273                finishEnumLiteral(typedEnumLiteralAlternative.sequenceBefore(EnumSet.of(COMMA, RBRACE)));
274        }
275
276        /** Returns a subrecognizer for C++ annotations/attributes. */
277        protected RecognizerBase<EGenericParserStates> createAnnotationSubrecognizer() {
278                // remember the start of the recognizer chain (we can not used the
279                // result of the method chain, as this would be the last recognizer)
280                return createRecognizer(start -> start.sequence(LBRACK, LBRACK).skipTo(RBRACK, RBRACK));
281        }
282
283        /** Finishes the rule for enum or enum class declarations. */
284        private static void finishEnumDeclaration(RecognizerBase<EGenericParserStates> enumAlternative, String subType,
285                        Object name) {
286                enumAlternative.sequence(LBRACE).createNode(EShallowEntityType.TYPE, subType).parseUntil(IN_ENUM)
287                                .sequence(RBRACE).optional(SEMICOLON).endNode();
288                enumAlternative.sequence(IDENTIFIER).sequenceBefore(EnumSet.of(LBRACE, COLON)).skipTo(LBRACE)
289                                .createNode(EShallowEntityType.TYPE, subType, name).parseUntil(IN_ENUM).sequence(RBRACE)
290                                .optional(SEMICOLON).endNode();
291        }
292
293        /** Finishes the rule for a enum literal. */
294        private static void finishEnumLiteral(RecognizerBase<EGenericParserStates> enumLiteralAlternative) {
295                enumLiteralAlternative.createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0).endNode();
296        }
297
298        @Override
299        protected EnumSet<ETokenType> getTypeKeywords() {
300                return TYPE_KEYWORDS;
301        }
302
303        /** Returns all primitive token types. */
304        protected EnumSet<ETokenType> getPrimitiveTypes() {
305                return PRIMITIVE_TYPES;
306        }
307
308        /** Returns all primitive token types. */
309        protected EnumSet<ETokenType> getTypeOrIdentifier() {
310                return TYPE_OR_IDENTIFIER;
311        }
312
313        /** Returns all method and type declaration modifiers. */
314        public EnumSet<ETokenType> getMethodAndTypeDeclarationModifiers() {
315                return METHOD_AND_TYPE_DECLARATION_MODIFIERS;
316        }
317
318        /**
319         * Creates a new recognizer that can match a scope prefix for a method-like
320         * construct. This includes sequences of identifiers with double colon, possibly
321         * intermixed with template arguments.
322         */
323        protected RecognizerBase<EGenericParserStates> createScopeRecognizer() {
324                // remember the start of the recognizer chain (we can not used the
325                // result of the method chain, as this would be the last recognizer)
326                return createRecognizer(start -> start.sequence(IDENTIFIER)
327                                .optionalSubRecognizer(new CppSkipTemplateSpecificationRecognizer()).sequence(SCOPE));
328        }
329
330        @Override
331        protected void createCaseRule() {
332                super.createCaseRule();
333
334                // C/C++ also allows parentheses here and type casts
335                inState(IN_METHOD).markStart().sequence(CASE).skipTo(COLON)
336                                .createNode(EShallowEntityType.META, 0, new Region(1, -2)).endNode();
337        }
338
339        /** Rules for detecting expanded macros in C/C++ code. */
340        protected void contributeExpandedMacroRule() {
341                EnumSet<ETokenType> separators = EnumSet.of(LBRACE, RBRACE);
342                separators.addAll(ETokenType.KEYWORDS);
343
344                inState(IN_METHOD).sequence(new UppercaseIdentifierMatcher()).skipNested(LPAREN, RPAREN)
345                                .optional(PREPROCESSOR_DIRECTIVE).sequenceBefore(separators)
346                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.EXPANDED_MACRO, 0).endNode();
347        }
348
349        @Override
350        protected void createSimpleStatementRule() {
351                contributeExpandedMacroRule();
352
353                createInLocalVariableTypeDeclarations();
354
355                EnumSet<ETokenType> validIdentifiers = getValidIdentifiers();
356                // need to add below operators to the list of valid identifiers because their
357                // alternative (string) representations can be used as variable/type names in C.
358                // Same applies to statement rules below.
359                validIdentifiers.addAll(OPERATORS_WITH_ALTERNATIVE_REPRESENTATION);
360
361                // statements
362                EnumSet<ETokenType> statementStartTokens = getStatementStartTokens();
363                statementStartTokens.addAll(OPERATORS_WITH_ALTERNATIVE_REPRESENTATION);
364                contributeSimpleStatementRules(validIdentifiers, statementStartTokens);
365        }
366
367        /**
368         * Creates rules for type declarations within local variable declarations, where
369         * a union or struct is not given a name but rather directly followed by
370         * variables.
371         */
372        private void createInLocalVariableTypeDeclarations() {
373                inState(IN_METHOD, TOP_LEVEL).sequence(EnumSet.of(STRUCT, UNION), LBRACE)
374                                .skipToWithNesting(RBRACE, LBRACE, RBRACE).markStart().sequence(IDENTIFIER)
375                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.LOCAL_VARIABLE, 0).skipTo(SEMICOLON).endNode();
376        }
377
378        @Override
379        protected EnumSet<ETokenType> getSimpleBlockKeywordsWithParentheses() {
380                return EnumSet.of(WHILE, FOR, SWITCH);
381        }
382
383        @Override
384        protected EnumSet<ETokenType> getSimpleBlockKeywordsWithoutParentheses() {
385                return EnumSet.of(ELSE);
386        }
387
388        @Override
389        protected EnumSet<ETokenType> getStatementStartTokens() {
390                return EnumSet.of(AUTO, NEW, DELETE, BREAK, CONTINUE, RETURN, ASSERT, FINAL, GOTO, SUPER, THIS, THROW, MULT,
391                                LPAREN, PLUSPLUS, MINUSMINUS, SCOPE, IDENTIFIER, OPERATOR);
392        }
393
394        @Override
395        protected RecognizerBase<EGenericParserStates> typePattern(RecognizerBase<EGenericParserStates> currentState) {
396
397                EnumSet<ETokenType> extendedTypeKeywords = EnumSet.copyOf(getTypeKeywords());
398                extendedTypeKeywords.add(TYPENAME);
399
400                RecognizerBase<EGenericParserStates> primitiveOrIdentifierMatcher = createRecognizer(start -> {
401                        // alternative 1: basic type
402                        start.sequence(BASE_TYPE_OR_IDENTIFIER);
403
404                        // alternative 2: primitive type consisting of "primitive modifiers" followed by
405                        // "proper" primitive (i.e. unsigned long int); note that this would also match
406                        // invalid ones, such as "long short float"
407                        start.optional(REGISTER).repeated(PRIMITIVE_MODIFIERS).sequence(PRIMITIVE_TYPES);
408
409                        // alternative 3: primitive type consisting of only "primitive modifiers" (i.e.
410                        // unsigned short); note that this would also match invalid ones, such as "long
411                        // long long". This one must be last, to ensure we do not end here in case of
412                        // "signed int".
413                        start.optional(REGISTER).repeatedAtLeastOnce(PRIMITIVE_MODIFIERS);
414                });
415
416                // The XOR clause is added to support the handle declarator (^) from
417                // Microsoft's C++/CLI, e.g. "FooClass^"
418                return currentState.repeated(getMethodAndTypeDeclarationModifiers()).skipNested(LPAREN, RPAREN)
419                                .optional(extendedTypeKeywords).skipNested(LPAREN, RPAREN)
420                                .repeatedSubRecognizer(createScopeRecognizer()).subRecognizer(primitiveOrIdentifierMatcher)
421                                .subRecognizer(new CppSkipTemplateSpecificationRecognizer(), 0, 1)
422                                .skipAny(EnumSet.of(MULT, AND, ANDAND, CONST, CONSTEXPR, XOR)).skipNested(LBRACK, RBRACK)
423                                .skipAny(EnumSet.of(MULT, AND, ANDAND, CONST, CONSTEXPR, XOR, LBRACK, RBRACK, NEAR, FAR));
424        }
425
426        @Override
427        protected void createSubExpressionRules() {
428                RecognizerBase<EGenericParserStates> lambdaAlternative = inState(IN_EXPRESSION).skipNested(LBRACK, RBRACK)
429                                .skipNested(LPAREN, RPAREN).repeated(LAMBDA_MODIFIERS).optional(EnumSet.of(NOEXCEPT, THROW))
430                                .skipNested(LPAREN, RPAREN);
431
432                finishLambdaRule(lambdaAlternative.sequence(POINTERTO).repeated(getTypeKeywords())
433                                .optional(EnumSet.of(SIGNED, UNSIGNED)).optional(getTypeOrIdentifier()).skipNested(LPAREN, RPAREN));
434                finishLambdaRule(lambdaAlternative);
435
436        }
437
438        /** Finishes a rule that indicates a lambda. */
439        private static void finishLambdaRule(RecognizerBase<EGenericParserStates> recognizer) {
440                recognizer.sequence(LBRACE).createNode(EShallowEntityType.METHOD, SubTypeNames.LAMBDA_EXPRESSION)
441                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
442        }
443
444        @Override
445        protected RecognizerBase<EGenericParserStates> getSubExpressionRecognizer() {
446                return new CppLambdaRecognizer(getTypeOrIdentifier());
447        }
448
449        @Override
450        protected boolean isFilteredToken(IToken token, IToken previousToken) {
451                if (token.getType() == IDENTIFIER && PSEUDO_KEYWORDS.contains(token.getText())) {
452                        return true;
453                }
454
455                return super.isFilteredToken(token, previousToken);
456        }
457
458        @Override
459        protected List<IToken> filterTokens(List<IToken> tokens) {
460                List<IToken> result = super.filterTokens(tokens);
461                result = filterGCCAttributes(result);
462                result = filterMicrosoftSpecificAttribute(result);
463                return result;
464        }
465
466        /**
467         * __declspec is a Microsoft-specific C/C++ token that we have to ignore.
468         * https://docs.microsoft.com/en-us/previous-versions/dabb5z75(v=vs.140)
469         */
470        private static List<IToken> filterMicrosoftSpecificAttribute(List<IToken> tokens) {
471                return filterParserSpecificAttribute(tokens, "__declspec");
472        }
473
474        /**
475         * Filters GCC attributes. See e.g.
476         * http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html
477         */
478        private static List<IToken> filterGCCAttributes(List<IToken> tokens) {
479                return filterParserSpecificAttribute(tokens, "__attribute__");
480        }
481
482        /**
483         * Filters tokens belonging to the attribute with the given keyword. E.g.,
484         * microsoft parsers allow "__declspec(...)".
485         */
486        private static List<IToken> filterParserSpecificAttribute(List<IToken> tokens, String attributeKeyword) {
487                List<IToken> result = new ArrayList<>();
488                boolean inAttribute = false;
489                int openBraces = 0;
490                for (int i = 0; i < tokens.size(); i++) {
491                        IToken token = tokens.get(i);
492                        if (token.getText().equals(attributeKeyword)) {
493                                inAttribute = true;
494                        } else if (inAttribute && token.getType() == LPAREN) {
495                                openBraces++;
496                        } else if (inAttribute && token.getType() == RPAREN) {
497                                openBraces--;
498                                if (openBraces == 0) {
499                                        inAttribute = false;
500                                }
501                        } else if (!inAttribute) {
502                                result.add(token);
503                        }
504                }
505                return result;
506        }
507}