001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.cpp; 018 019import static eu.cqse.check.framework.scanner.ETokenType.ALIGNAS; 020import static eu.cqse.check.framework.scanner.ETokenType.AND; 021import static eu.cqse.check.framework.scanner.ETokenType.ANDAND; 022import static eu.cqse.check.framework.scanner.ETokenType.ASSERT; 023import static eu.cqse.check.framework.scanner.ETokenType.AUTO; 024import static eu.cqse.check.framework.scanner.ETokenType.BOOL; 025import static eu.cqse.check.framework.scanner.ETokenType.BREAK; 026import static eu.cqse.check.framework.scanner.ETokenType.BYTE; 027import static eu.cqse.check.framework.scanner.ETokenType.CASE; 028import static eu.cqse.check.framework.scanner.ETokenType.CHAR; 029import static eu.cqse.check.framework.scanner.ETokenType.CLASS; 030import static eu.cqse.check.framework.scanner.ETokenType.COLON; 031import static eu.cqse.check.framework.scanner.ETokenType.COMMA; 032import static eu.cqse.check.framework.scanner.ETokenType.CONST; 033import static eu.cqse.check.framework.scanner.ETokenType.CONSTEXPR; 034import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE; 035import static eu.cqse.check.framework.scanner.ETokenType.DECLTYPE; 036import static eu.cqse.check.framework.scanner.ETokenType.DELETE; 037import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE; 038import static eu.cqse.check.framework.scanner.ETokenType.ELSE; 039import static eu.cqse.check.framework.scanner.ETokenType.ENUM; 040import static eu.cqse.check.framework.scanner.ETokenType.EQ; 041import static eu.cqse.check.framework.scanner.ETokenType.EXPLICIT; 042import static eu.cqse.check.framework.scanner.ETokenType.EXTERN; 043import static eu.cqse.check.framework.scanner.ETokenType.FAR; 044import static eu.cqse.check.framework.scanner.ETokenType.FINAL; 045import static eu.cqse.check.framework.scanner.ETokenType.FLOAT; 046import static eu.cqse.check.framework.scanner.ETokenType.FOR; 047import static eu.cqse.check.framework.scanner.ETokenType.FRIEND; 048import static eu.cqse.check.framework.scanner.ETokenType.GOTO; 049import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER; 050import static eu.cqse.check.framework.scanner.ETokenType.INLINE; 051import static eu.cqse.check.framework.scanner.ETokenType.INT; 052import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE_CLASS; 053import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE_STRUCT; 054import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 055import static eu.cqse.check.framework.scanner.ETokenType.LBRACK; 056import static eu.cqse.check.framework.scanner.ETokenType.LONG; 057import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 058import static eu.cqse.check.framework.scanner.ETokenType.MINUSMINUS; 059import static eu.cqse.check.framework.scanner.ETokenType.MULT; 060import static eu.cqse.check.framework.scanner.ETokenType.MUTABLE; 061import static eu.cqse.check.framework.scanner.ETokenType.NAMESPACE; 062import static eu.cqse.check.framework.scanner.ETokenType.NEAR; 063import static eu.cqse.check.framework.scanner.ETokenType.NEW; 064import static eu.cqse.check.framework.scanner.ETokenType.NOEXCEPT; 065import static eu.cqse.check.framework.scanner.ETokenType.NORETURN; 066import static eu.cqse.check.framework.scanner.ETokenType.OPERATOR; 067import static eu.cqse.check.framework.scanner.ETokenType.PLUSPLUS; 068import static eu.cqse.check.framework.scanner.ETokenType.POINTERTO; 069import static eu.cqse.check.framework.scanner.ETokenType.PREPROCESSOR_DIRECTIVE; 070import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE; 071import static eu.cqse.check.framework.scanner.ETokenType.PUBLIC; 072import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 073import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 074import static eu.cqse.check.framework.scanner.ETokenType.REGISTER; 075import static eu.cqse.check.framework.scanner.ETokenType.RETURN; 076import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 077import static eu.cqse.check.framework.scanner.ETokenType.SCOPE; 078import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON; 079import static eu.cqse.check.framework.scanner.ETokenType.SHORT; 080import static eu.cqse.check.framework.scanner.ETokenType.SIGNED; 081import static eu.cqse.check.framework.scanner.ETokenType.STATIC; 082import static eu.cqse.check.framework.scanner.ETokenType.STRUCT; 083import static eu.cqse.check.framework.scanner.ETokenType.SUPER; 084import static eu.cqse.check.framework.scanner.ETokenType.SWITCH; 085import static eu.cqse.check.framework.scanner.ETokenType.THIS; 086import static eu.cqse.check.framework.scanner.ETokenType.THROW; 087import static eu.cqse.check.framework.scanner.ETokenType.TYPENAME; 088import static eu.cqse.check.framework.scanner.ETokenType.UNION; 089import static eu.cqse.check.framework.scanner.ETokenType.UNSIGNED; 090import static eu.cqse.check.framework.scanner.ETokenType.VIRTUAL; 091import static eu.cqse.check.framework.scanner.ETokenType.VOID; 092import static eu.cqse.check.framework.scanner.ETokenType.WCHAR_T; 093import static eu.cqse.check.framework.scanner.ETokenType.WHILE; 094import static eu.cqse.check.framework.scanner.ETokenType.XOR; 095import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_ENUM; 096import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_EXPRESSION; 097import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_METHOD; 098import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_MODULE; 099import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_TYPE; 100import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.TOP_LEVEL; 101 102import java.util.ArrayList; 103import java.util.Arrays; 104import java.util.EnumSet; 105import java.util.HashSet; 106import java.util.List; 107import java.util.Set; 108 109import org.conqat.lib.commons.collections.CollectionUtils; 110import org.conqat.lib.commons.region.Region; 111 112import eu.cqse.check.framework.scanner.ETokenType; 113import eu.cqse.check.framework.scanner.IToken; 114import eu.cqse.check.framework.shallowparser.SubTypeNames; 115import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 116import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 117import eu.cqse.check.framework.shallowparser.languages.base.CStyleShallowParserBase; 118import eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates; 119 120/** 121 * Shallow parser for C/C++14. 122 * <p> 123 * What this parser does and does not: 124 * <ul> 125 * <li>The parser recognizes types (classes, enums, interfaces), methods and 126 * attributes, individual statements and lambdas.</li> 127 * <li>It recognizes the nesting of statements (e.g. in loops), but does not 128 * parse into the statements. For example, it recognizes an if-statement and 129 * provides the list of sub-statements, but does not provide direct access to 130 * the if-condition.</li> 131 * <li>All preprocessor statements are parsed as meta information.</li> 132 * <li>Template declarations are parsed as preceding meta information.</li> 133 * <li>Forward declarations are handled as meta information.</li> 134 * <li>We heuristically filter code generating macros, such as 135 * "CREATE_STUFF(MyClass)".</li> 136 * </ul> 137 */ 138public class CppShallowParser extends CStyleShallowParserBase { 139 140 /** Basic keywords used for primitive types in a C++ like language. */ 141 protected static final EnumSet<ETokenType> PRIMITIVE_TYPES = EnumSet.of(VOID, BYTE, INT, FLOAT, DOUBLE, CHAR, BOOL, 142 WCHAR_T); 143 144 /** 145 * Modifiers that can be used in front of the other primitive types. Note that 146 * in C you can write both "short int" and "short", so the "int" is assumed 147 * implicitly. Still, "short" is considered a modifier here, which is different 148 * from e.g. Java, where short would be a primitive on its own. 149 */ 150 private static final EnumSet<ETokenType> PRIMITIVE_MODIFIERS = EnumSet.of(LONG, SHORT, SIGNED, UNSIGNED); 151 152 /** Token types that are used for type declarations. */ 153 private static final EnumSet<ETokenType> TYPE_KEYWORDS = EnumSet.of(CLASS, STRUCT, UNION, ENUM); 154 155 /** Modifiers that can be specified after a lambda. */ 156 private static final EnumSet<ETokenType> LAMBDA_MODIFIERS = EnumSet.of(CONSTEXPR, MUTABLE); 157 158 /** Base token types for types or identifiers (without primitive types). */ 159 protected static final EnumSet<ETokenType> BASE_TYPE_OR_IDENTIFIER = EnumSet.of(AUTO, DECLTYPE, IDENTIFIER); 160 161 /** Token types for types or identifiers. */ 162 private static final EnumSet<ETokenType> TYPE_OR_IDENTIFIER = CollectionUtils.enumUnionSet(BASE_TYPE_OR_IDENTIFIER, 163 PRIMITIVE_TYPES); 164 165 /** 166 * Token types that are modifiers in front of a type declaration or method. 167 */ 168 protected static final EnumSet<ETokenType> METHOD_AND_TYPE_DECLARATION_MODIFIERS = EnumSet.of(CONST, CONSTEXPR, 169 STATIC, VIRTUAL, EXTERN, NEAR, FAR, ALIGNAS, MUTABLE, FRIEND, EXPLICIT, INLINE, NORETURN); 170 171 /** 172 * Interface modifiers used in C++/CLI extensions that execute in .NET managed 173 * execution environment. 174 */ 175 private static final EnumSet<ETokenType> MS_INTERFACE_MODIFIERS = EnumSet.of(PUBLIC, PRIVATE); 176 177 /** 178 * Interface types used in C++/CLI extensions that execute in .NET managed 179 * execution environment. 180 */ 181 private static final EnumSet<ETokenType> MS_INTERFACE_TYPES = EnumSet.of(INTERFACE_CLASS, INTERFACE_STRUCT); 182 183 /** 184 * Set of common "keywords" that are not actually part of the language but 185 * rather used by certain compilers and implicitly defined using macros. The 186 * solution used here is to filter them out. 187 */ 188 private static final Set<String> PSEUDO_KEYWORDS = new HashSet<>(Arrays.asList( 189 // typically found in Windows compilers 190 "__fastcall", "__export", "__forceinline", "_cdecl", "_stdcall", "__stdcall", "WINAPI", "APIENTRY", 191 "CALLBACK", 192 // used by the common Qt library 193 "Q_EXPORT", 194 // keywords found in ISA dialog manager 195 "DML_c", "DM_CALLBACK", "__1", "__2", "__3", "__4", "__5", "__6", "__7", "DM_ENTRY", "DML_pascal", 196 "DML_default", 197 // project specific keywords 198 "IGVPWORD_API")); 199 200 public CppShallowParser() { 201 createNamespaceRules(); 202 } 203 204 @Override 205 protected void createMetaRules() { 206 new CppShallowParserMetaRules(this).contributeRules(); 207 super.createMetaRules(); 208 } 209 210 @Override 211 protected void createClassElementsRules() { 212 new CppShallowParserClassElementRules(this).contributeRules(); 213 } 214 215 /** Creates namespace specific rules. */ 216 private void createNamespaceRules() { 217 // namespace 218 RecognizerBase<EGenericParserStates> namespaceAlternative = inAnyState().sequence(NAMESPACE) 219 .repeatedSubRecognizer(createAnnotationSubrecognizer()).markStart() 220 .skipBefore(EnumSet.of(SEMICOLON, LBRACE)); 221 namespaceAlternative.sequence(LBRACE) 222 .createNode(EShallowEntityType.MODULE, SubTypeNames.NAMESPACE, new Region(0, -2)).parseUntil(TOP_LEVEL) 223 .sequence(RBRACE).endNode(); 224 namespaceAlternative.sequence(SEMICOLON) 225 .createNode(EShallowEntityType.META, SubTypeNames.NAMESPACE, new Region(0, -2)).endNode(); 226 } 227 228 @Override 229 protected void createTypeRules() { 230 createTypedefRules(); 231 createEnumRules(); 232 233 // types; we have to ensure when skipping to the LBRACE, that there is 234 // no earlier SEMICOLON or EQ, as in these cases it is a forward 235 // declaration or a variable. 236 for (ETokenType typeKeyword : getTypeKeywords()) { 237 inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(typeKeyword) 238 .repeatedSubRecognizer(createAnnotationSubrecognizer()).markStart().sequence(getValidIdentifiers()) 239 // prevent parsing functions that return a struct to be recognized as types 240 .sequenceBefore(EnumSet.complementOf(EnumSet.of(IDENTIFIER, MULT))) // 241 .skipBefore(EnumSet.of(SEMICOLON, LBRACE, EQ)).sequence(LBRACE) 242 .createNode(EShallowEntityType.TYPE, typeKeyword.name().toLowerCase(), 0).parseUntil(IN_TYPE) 243 .sequence(RBRACE).optional(SEMICOLON).endNode(); 244 } 245 246 // Rule for recognizing interface classes/structs used in C++/CLI extensions 247 // that execute in .NET managed 248 // execution environment. 249 inState(TOP_LEVEL, IN_MODULE).optional(MS_INTERFACE_MODIFIERS).sequence(MS_INTERFACE_TYPES).markStart() 250 .sequence(getValidIdentifiers()).skipBefore(LBRACE).sequence(LBRACE) 251 .createNode(EShallowEntityType.TYPE, SubTypeNames.INTERFACE_CLASS_NAME, 0).parseUntil(IN_TYPE) 252 .sequence(RBRACE).optional(SEMICOLON).endNode(); 253 254 // anonymous types 255 inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(getTypeKeywords(), LBRACE) 256 .createNode(EShallowEntityType.TYPE, 0, "<anonymous>").parseUntil(IN_TYPE).sequence(RBRACE) 257 .optional(SEMICOLON).endNode(); 258 } 259 260 /** Creates rules for parsing enums and enum classes. */ 261 private void createEnumRules() { 262 // enum (both anonymous and named) 263 finishEnumDeclaration(inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(ENUM), SubTypeNames.ENUM, 1); 264 finishEnumDeclaration(inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(ENUM, CLASS), SubTypeNames.ENUM_CLASS, 2); 265 266 RecognizerBase<EGenericParserStates> enumLiteralAlternative = inState(IN_ENUM).sequence(IDENTIFIER) 267 .optionalSubRecognizer(createAnnotationSubrecognizer()); 268 finishEnumLiteral(enumLiteralAlternative.sequenceBefore(EnumSet.of(COMMA, RBRACE))); 269 finishEnumLiteral(enumLiteralAlternative.sequence(EQ).skipBefore(EnumSet.of(COMMA, RBRACE))); 270 271 RecognizerBase<EGenericParserStates> typedEnumLiteralAlternative = typePatternInState(IN_ENUM) 272 .sequence(IDENTIFIER).optionalSubRecognizer(createAnnotationSubrecognizer()); 273 finishEnumLiteral(typedEnumLiteralAlternative.sequenceBefore(EnumSet.of(COMMA, RBRACE))); 274 } 275 276 /** Returns a subrecognizer for C++ annotations/attributes. */ 277 protected RecognizerBase<EGenericParserStates> createAnnotationSubrecognizer() { 278 // remember the start of the recognizer chain (we can not used the 279 // result of the method chain, as this would be the last recognizer) 280 return createRecognizer(start -> start.sequence(LBRACK, LBRACK).skipTo(RBRACK, RBRACK)); 281 } 282 283 /** Finishes the rule for enum or enum class declarations. */ 284 private static void finishEnumDeclaration(RecognizerBase<EGenericParserStates> enumAlternative, String subType, 285 Object name) { 286 enumAlternative.sequence(LBRACE).createNode(EShallowEntityType.TYPE, subType).parseUntil(IN_ENUM) 287 .sequence(RBRACE).optional(SEMICOLON).endNode(); 288 enumAlternative.sequence(IDENTIFIER).sequenceBefore(EnumSet.of(LBRACE, COLON)).skipTo(LBRACE) 289 .createNode(EShallowEntityType.TYPE, subType, name).parseUntil(IN_ENUM).sequence(RBRACE) 290 .optional(SEMICOLON).endNode(); 291 } 292 293 /** Finishes the rule for a enum literal. */ 294 private static void finishEnumLiteral(RecognizerBase<EGenericParserStates> enumLiteralAlternative) { 295 enumLiteralAlternative.createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0).endNode(); 296 } 297 298 @Override 299 protected EnumSet<ETokenType> getTypeKeywords() { 300 return TYPE_KEYWORDS; 301 } 302 303 /** Returns all primitive token types. */ 304 protected EnumSet<ETokenType> getPrimitiveTypes() { 305 return PRIMITIVE_TYPES; 306 } 307 308 /** Returns all primitive token types. */ 309 protected EnumSet<ETokenType> getTypeOrIdentifier() { 310 return TYPE_OR_IDENTIFIER; 311 } 312 313 /** Returns all method and type declaration modifiers. */ 314 public EnumSet<ETokenType> getMethodAndTypeDeclarationModifiers() { 315 return METHOD_AND_TYPE_DECLARATION_MODIFIERS; 316 } 317 318 /** 319 * Creates a new recognizer that can match a scope prefix for a method-like 320 * construct. This includes sequences of identifiers with double colon, possibly 321 * intermixed with template arguments. 322 */ 323 protected RecognizerBase<EGenericParserStates> createScopeRecognizer() { 324 // remember the start of the recognizer chain (we can not used the 325 // result of the method chain, as this would be the last recognizer) 326 return createRecognizer(start -> start.sequence(IDENTIFIER) 327 .optionalSubRecognizer(new CppSkipTemplateSpecificationRecognizer()).sequence(SCOPE)); 328 } 329 330 @Override 331 protected void createCaseRule() { 332 super.createCaseRule(); 333 334 // C/C++ also allows parentheses here and type casts 335 inState(IN_METHOD).markStart().sequence(CASE).skipTo(COLON) 336 .createNode(EShallowEntityType.META, 0, new Region(1, -2)).endNode(); 337 } 338 339 /** Rules for detecting expanded macros in C/C++ code. */ 340 protected void contributeExpandedMacroRule() { 341 EnumSet<ETokenType> separators = EnumSet.of(LBRACE, RBRACE); 342 separators.addAll(ETokenType.KEYWORDS); 343 344 inState(IN_METHOD).sequence(new UppercaseIdentifierMatcher()).skipNested(LPAREN, RPAREN) 345 .optional(PREPROCESSOR_DIRECTIVE).sequenceBefore(separators) 346 .createNode(EShallowEntityType.STATEMENT, SubTypeNames.EXPANDED_MACRO, 0).endNode(); 347 } 348 349 @Override 350 protected void createSimpleStatementRule() { 351 contributeExpandedMacroRule(); 352 353 createInLocalVariableTypeDeclarations(); 354 355 EnumSet<ETokenType> validIdentifiers = getValidIdentifiers(); 356 // need to add below operators to the list of valid identifiers because their 357 // alternative (string) representations can be used as variable/type names in C. 358 // Same applies to statement rules below. 359 validIdentifiers.addAll(OPERATORS_WITH_ALTERNATIVE_REPRESENTATION); 360 361 // statements 362 EnumSet<ETokenType> statementStartTokens = getStatementStartTokens(); 363 statementStartTokens.addAll(OPERATORS_WITH_ALTERNATIVE_REPRESENTATION); 364 contributeSimpleStatementRules(validIdentifiers, statementStartTokens); 365 } 366 367 /** 368 * Creates rules for type declarations within local variable declarations, where 369 * a union or struct is not given a name but rather directly followed by 370 * variables. 371 */ 372 private void createInLocalVariableTypeDeclarations() { 373 inState(IN_METHOD, TOP_LEVEL).sequence(EnumSet.of(STRUCT, UNION), LBRACE) 374 .skipToWithNesting(RBRACE, LBRACE, RBRACE).markStart().sequence(IDENTIFIER) 375 .createNode(EShallowEntityType.STATEMENT, SubTypeNames.LOCAL_VARIABLE, 0).skipTo(SEMICOLON).endNode(); 376 } 377 378 @Override 379 protected EnumSet<ETokenType> getSimpleBlockKeywordsWithParentheses() { 380 return EnumSet.of(WHILE, FOR, SWITCH); 381 } 382 383 @Override 384 protected EnumSet<ETokenType> getSimpleBlockKeywordsWithoutParentheses() { 385 return EnumSet.of(ELSE); 386 } 387 388 @Override 389 protected EnumSet<ETokenType> getStatementStartTokens() { 390 return EnumSet.of(AUTO, NEW, DELETE, BREAK, CONTINUE, RETURN, ASSERT, FINAL, GOTO, SUPER, THIS, THROW, MULT, 391 LPAREN, PLUSPLUS, MINUSMINUS, SCOPE, IDENTIFIER, OPERATOR); 392 } 393 394 @Override 395 protected RecognizerBase<EGenericParserStates> typePattern(RecognizerBase<EGenericParserStates> currentState) { 396 397 EnumSet<ETokenType> extendedTypeKeywords = EnumSet.copyOf(getTypeKeywords()); 398 extendedTypeKeywords.add(TYPENAME); 399 400 RecognizerBase<EGenericParserStates> primitiveOrIdentifierMatcher = createRecognizer(start -> { 401 // alternative 1: basic type 402 start.sequence(BASE_TYPE_OR_IDENTIFIER); 403 404 // alternative 2: primitive type consisting of "primitive modifiers" followed by 405 // "proper" primitive (i.e. unsigned long int); note that this would also match 406 // invalid ones, such as "long short float" 407 start.optional(REGISTER).repeated(PRIMITIVE_MODIFIERS).sequence(PRIMITIVE_TYPES); 408 409 // alternative 3: primitive type consisting of only "primitive modifiers" (i.e. 410 // unsigned short); note that this would also match invalid ones, such as "long 411 // long long". This one must be last, to ensure we do not end here in case of 412 // "signed int". 413 start.optional(REGISTER).repeatedAtLeastOnce(PRIMITIVE_MODIFIERS); 414 }); 415 416 // The XOR clause is added to support the handle declarator (^) from 417 // Microsoft's C++/CLI, e.g. "FooClass^" 418 return currentState.repeated(getMethodAndTypeDeclarationModifiers()).skipNested(LPAREN, RPAREN) 419 .optional(extendedTypeKeywords).skipNested(LPAREN, RPAREN) 420 .repeatedSubRecognizer(createScopeRecognizer()).subRecognizer(primitiveOrIdentifierMatcher) 421 .subRecognizer(new CppSkipTemplateSpecificationRecognizer(), 0, 1) 422 .skipAny(EnumSet.of(MULT, AND, ANDAND, CONST, CONSTEXPR, XOR)).skipNested(LBRACK, RBRACK) 423 .skipAny(EnumSet.of(MULT, AND, ANDAND, CONST, CONSTEXPR, XOR, LBRACK, RBRACK, NEAR, FAR)); 424 } 425 426 @Override 427 protected void createSubExpressionRules() { 428 RecognizerBase<EGenericParserStates> lambdaAlternative = inState(IN_EXPRESSION).skipNested(LBRACK, RBRACK) 429 .skipNested(LPAREN, RPAREN).repeated(LAMBDA_MODIFIERS).optional(EnumSet.of(NOEXCEPT, THROW)) 430 .skipNested(LPAREN, RPAREN); 431 432 finishLambdaRule(lambdaAlternative.sequence(POINTERTO).repeated(getTypeKeywords()) 433 .optional(EnumSet.of(SIGNED, UNSIGNED)).optional(getTypeOrIdentifier()).skipNested(LPAREN, RPAREN)); 434 finishLambdaRule(lambdaAlternative); 435 436 } 437 438 /** Finishes a rule that indicates a lambda. */ 439 private static void finishLambdaRule(RecognizerBase<EGenericParserStates> recognizer) { 440 recognizer.sequence(LBRACE).createNode(EShallowEntityType.METHOD, SubTypeNames.LAMBDA_EXPRESSION) 441 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 442 } 443 444 @Override 445 protected RecognizerBase<EGenericParserStates> getSubExpressionRecognizer() { 446 return new CppLambdaRecognizer(getTypeOrIdentifier()); 447 } 448 449 @Override 450 protected boolean isFilteredToken(IToken token, IToken previousToken) { 451 if (token.getType() == IDENTIFIER && PSEUDO_KEYWORDS.contains(token.getText())) { 452 return true; 453 } 454 455 return super.isFilteredToken(token, previousToken); 456 } 457 458 @Override 459 protected List<IToken> filterTokens(List<IToken> tokens) { 460 List<IToken> result = super.filterTokens(tokens); 461 result = filterGCCAttributes(result); 462 result = filterMicrosoftSpecificAttribute(result); 463 return result; 464 } 465 466 /** 467 * __declspec is a Microsoft-specific C/C++ token that we have to ignore. 468 * https://docs.microsoft.com/en-us/previous-versions/dabb5z75(v=vs.140) 469 */ 470 private static List<IToken> filterMicrosoftSpecificAttribute(List<IToken> tokens) { 471 return filterParserSpecificAttribute(tokens, "__declspec"); 472 } 473 474 /** 475 * Filters GCC attributes. See e.g. 476 * http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html 477 */ 478 private static List<IToken> filterGCCAttributes(List<IToken> tokens) { 479 return filterParserSpecificAttribute(tokens, "__attribute__"); 480 } 481 482 /** 483 * Filters tokens belonging to the attribute with the given keyword. E.g., 484 * microsoft parsers allow "__declspec(...)". 485 */ 486 private static List<IToken> filterParserSpecificAttribute(List<IToken> tokens, String attributeKeyword) { 487 List<IToken> result = new ArrayList<>(); 488 boolean inAttribute = false; 489 int openBraces = 0; 490 for (int i = 0; i < tokens.size(); i++) { 491 IToken token = tokens.get(i); 492 if (token.getText().equals(attributeKeyword)) { 493 inAttribute = true; 494 } else if (inAttribute && token.getType() == LPAREN) { 495 openBraces++; 496 } else if (inAttribute && token.getType() == RPAREN) { 497 openBraces--; 498 if (openBraces == 0) { 499 inAttribute = false; 500 } 501 } else if (!inAttribute) { 502 result.add(token); 503 } 504 } 505 return result; 506 } 507}