001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.cs; 018 019import static eu.cqse.check.framework.scanner.ETokenType.*; 020import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_EXPRESSION; 021import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_METHOD; 022import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_MODULE; 023import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_TYPE; 024import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.TOP_LEVEL; 025 026import java.util.EnumSet; 027 028import org.conqat.lib.commons.region.Region; 029 030import eu.cqse.check.framework.scanner.ETokenType; 031import eu.cqse.check.framework.scanner.ETokenType.ETokenClass; 032import eu.cqse.check.framework.scanner.IToken; 033import eu.cqse.check.framework.shallowparser.SubTypeNames; 034import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 035import eu.cqse.check.framework.shallowparser.framework.PropertyAccessNameResolver; 036import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 037import eu.cqse.check.framework.shallowparser.languages.base.CStyleShallowParserBase; 038import eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates; 039 040/** 041 * Shallow parser for C# 042 * <p> 043 * What this parser does and does not: 044 * <ul> 045 * <li>The parser recognizes types (classes, enums, interfaces), methods and 046 * attributes, and individual statements.</li> 047 * <li>It recognizes the nesting of statements (e.g. in loops), but does not 048 * parse into the statements. For example, it recognizes an if-statement and 049 * provides the list of sub-statements, but does not provide direct access to 050 * the if-condition.</li> 051 * <li>Using statements and annotations are parsed as meta information.</li> 052 * </ul> 053 */ 054public class CsShallowParser extends CStyleShallowParserBase { 055 056 /** 057 * A set of all token types that can be used as valid identifiers. See 058 * http://msdn.microsoft.com/en-us/library/x53a06bb.aspx for the full list. 059 */ 060 public static final EnumSet<ETokenType> VALID_IDENTIFIERS = EnumSet.of(IDENTIFIER, ADD, ALIAS, ASCENDING, ASYNC, 061 AWAIT, DESCENDING, DYNAMIC, FROM, GET, GLOBAL, GROUP, INTO, JOIN, LET, ORDERBY, PARTIAL, REMOVE, SELECT, 062 SET, VALUE, VAR, WHERE, YIELD); 063 064 /** All primitive types. */ 065 private static final EnumSet<ETokenType> PRIMITIVE_TYPES = EnumSet.of(VOID, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, 066 CHAR, BOOL, STRING, OBJECT, DECIMAL, SBYTE, USHORT, UINT, ULONG); 067 068 /** {@inheritDoc} */ 069 @Override 070 protected void createMetaRules() { 071 // using 072 inState(TOP_LEVEL, IN_MODULE).markStart().sequence(USING).optional(STATIC).markStart().skipTo(SEMICOLON) 073 .createNode(EShallowEntityType.META, SubTypeNames.USING, new Region(0, -2)).endNode(); 074 075 // annotations 076 inState(IN_TYPE, IN_MODULE, TOP_LEVEL).sequence(LBRACK).createNode(EShallowEntityType.META, "annotation", 1) 077 .skipToWithNesting(RBRACK, LBRACK, RBRACK).endNode(); 078 079 // preprocessor stuff 080 inAnyState().sequence(PREPROCESSOR_DIRECTIVE).createNode(EShallowEntityType.META, 0).endNode(); 081 082 super.createMetaRules(); 083 } 084 085 /** {@inheritDoc} */ 086 @Override 087 protected void createTypeRules() { 088 // namespace 089 inState(TOP_LEVEL, IN_MODULE).sequence(NAMESPACE, getValidIdentifiers()).skipTo(LBRACE) 090 .createNode(EShallowEntityType.MODULE, 0, new Region(1, -2)).parseUntil(IN_MODULE).sequence(RBRACE) 091 .endNode(); 092 093 super.createTypeRules(); 094 } 095 096 /** {@inheritDoc} */ 097 @Override 098 protected EnumSet<ETokenType> getTypeKeywords() { 099 return EnumSet.of(CLASS, INTERFACE, ENUM, STRUCT); 100 } 101 102 /** {@inheritDoc} */ 103 @Override 104 protected EnumSet<ETokenType> getTypeModifier() { 105 return EnumSet.of(PUBLIC, PRIVATE, ABSTRACT, SEALED, INTERNAL, PARTIAL, STATIC); 106 } 107 108 /** Returns both type and type member modifiers */ 109 private EnumSet<ETokenType> getTypeAndMemberModifiers() { 110 EnumSet<ETokenType> allModifiers = getTypeModifier(); 111 allModifiers.addAll( 112 EnumSet.of(PROTECTED, VIRTUAL, ASYNC, CONST, EVENT, EXTERN, OVERRIDE, READONLY, UNSAFE, VOLATILE, NEW)); 113 return allModifiers; 114 } 115 116 /** Returns modifiers that are applicable to events */ 117 private static EnumSet<ETokenType> getEventModifiers() { 118 return EnumSet.of(PRIVATE, PROTECTED, PUBLIC, INTERNAL, STATIC, VIRTUAL, SEALED, ABSTRACT); 119 } 120 121 /** {@inheritDoc} */ 122 @Override 123 protected void createClassElementsRules() { 124 // simple enum literals 125 inState(IN_TYPE).sequence(IDENTIFIER).sequenceBefore(EnumSet.of(COMMA, EQ, RBRACE)) 126 .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0) 127 .skipBefore(EnumSet.of(COMMA, RBRACE)).optional(COMMA).endNode(); 128 129 // delegates 130 typePattern(inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(DELEGATE)).sequence(getValidIdentifiers(), LPAREN) 131 .createNode(EShallowEntityType.METHOD, 0, -2).skipTo(RPAREN).skipTo(SEMICOLON).endNode(); 132 133 createMethodRules(); 134 135 // event rules should be in front of properties, since otherwise properties 136 // rules will catch events as well 137 createEventsRules(); 138 createPropertiesRules(); 139 140 // attributes, e.g., fields (must be after method, as this would also 141 // match methods) 142 typePatternInState(IN_TYPE).sequence(getValidIdentifiers()) 143 .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ATTRIBUTE, -1) 144 .skipToWithNesting(SEMICOLON, LBRACE, RBRACE, getSubExpressionRecognizer()).endNode(); 145 146 // static initializer, get/set for properties, add/remove in events 147 inState(IN_TYPE).sequence(EnumSet.of(GET, SET), SEMICOLON).createNode(EShallowEntityType.METHOD, 148 new Object[] { "empty", 0 }, new PropertyAccessNameResolver<EGenericParserStates>()).endNode(); 149 inState(IN_TYPE).sequence(EnumSet.of(GET, SET), LBRACE) 150 .createNode(EShallowEntityType.METHOD, 0, new PropertyAccessNameResolver<EGenericParserStates>()) 151 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 152 inState(IN_TYPE).sequence(EnumSet.of(GET, SET), DOUBLE_ARROW) 153 .createNode(EShallowEntityType.METHOD, 0, new PropertyAccessNameResolver<EGenericParserStates>()) 154 .parseOnce(IN_METHOD).endNode(); 155 inState(IN_TYPE).sequence(EnumSet.of(STATIC, ADD, REMOVE), LBRACE).createNode(EShallowEntityType.METHOD, 0) 156 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 157 } 158 159 /** Creates the rules for parsing events. */ 160 private void createEventsRules() { 161 RecognizerBase<EGenericParserStates> eventRecognizer = inState(TOP_LEVEL, IN_MODULE, IN_TYPE) 162 .repeated(getEventModifiers()).markStart().sequence(EVENT).sequence(getValidIdentifiers()) 163 .skipNested(LT, GT).repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer()) 164 .sequence(getValidIdentifiers()).createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.EVENT, -1); 165 eventRecognizer.sequence(LBRACE).parseUntil(IN_TYPE).sequence(RBRACE).endNode(); 166 eventRecognizer.skipTo(SEMICOLON).endNode(); 167 } 168 169 /** Creates the rules for parsing properties. */ 170 private void createPropertiesRules() { 171 RecognizerBase<EGenericParserStates> alternatives = typePatternInState(IN_TYPE) 172 .repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer()).sequence(getValidIdentifiers()); 173 174 alternatives.sequence(DOUBLE_ARROW).createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.PROPERTY, -2) 175 .parseOnce(IN_METHOD).endNode(); 176 177 RecognizerBase<EGenericParserStates> braceAlternatives = alternatives.sequence(LBRACE) 178 .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.PROPERTY, -2).parseUntil(IN_TYPE) 179 .sequence(RBRACE); 180 braceAlternatives.sequence(EQ).parseOnce(IN_METHOD).endNode(); 181 braceAlternatives.endNode(); 182 } 183 184 /** 185 * Creates the rules for all method-like constructs inside types. 186 */ 187 private void createMethodRules() { 188 // indexers 189 completeMethod("indexer", EShallowEntityType.ATTRIBUTE, IN_TYPE, 190 typePatternInState(IN_TYPE).repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer()) 191 .markStart() 192 // could contain attributes 193 .sequence(THIS, LBRACK).skipToWithNesting(RBRACK, LBRACK, RBRACK)); 194 195 // operator overloading 196 createOperatorOverloadingRules(); 197 198 // methods 199 createMemberMethodRules(); 200 201 // constructor 202 finishConstructorLike(inState(IN_TYPE).repeated(EnumSet.of(PRIVATE, PROTECTED, PUBLIC, INTERNAL)).markStart(), 203 SubTypeNames.CONSTRUCTOR); 204 205 // static constructor 206 finishConstructorLike(inState(IN_TYPE).sequence(STATIC).markStart(), SubTypeNames.STATIC_CONSTRUCTOR); 207 208 // destructor 209 finishConstructorLike( 210 inState(IN_TYPE).repeated(EnumSet.of(PRIVATE, PROTECTED, PUBLIC, INTERNAL)).sequence(COMP).markStart(), 211 SubTypeNames.DESTRUCTOR); 212 } 213 214 /** Rules for member methods */ 215 private void createMemberMethodRules() { 216 // we have to skip the parameter list with nesting because of tuples (C#7) 217 completeMethod("method", EShallowEntityType.METHOD, IN_METHOD, 218 typePatternInState(IN_TYPE).repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer()) 219 .markStart().sequence(getValidIdentifiers()).skipNested(LT, GT).sequence(LPAREN) 220 .skipToWithNesting(RPAREN, LPAREN, RPAREN)); 221 222 // rule that recognizes methods with tuples as return type (C#7) 223 completeMethod("method", EShallowEntityType.METHOD, IN_METHOD, 224 inState(IN_TYPE).repeated(getTypeAndMemberModifiers()).sequence(LPAREN) 225 .skipToWithNesting(RPAREN, LPAREN, RPAREN) 226 .repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer()).markStart() 227 .sequence(getValidIdentifiers()).skipNested(LT, GT).sequence(LPAREN) 228 .skipToWithNesting(RPAREN, LPAREN, RPAREN)); 229 } 230 231 /** Rules for operator overloading. */ 232 private void createOperatorOverloadingRules() { 233 EnumSet<ETokenType> primitiveOrIdentifier = EnumSet.copyOf(PRIMITIVE_TYPES); 234 primitiveOrIdentifier.add(IDENTIFIER); 235 236 completeMethod(SubTypeNames.OPERATOR, EShallowEntityType.METHOD, IN_METHOD, 237 inState(IN_TYPE).repeated(EnumSet.of(PRIVATE, PROTECTED, PUBLIC, INTERNAL, STATIC)) 238 .sequence(EnumSet.of(IMPLICIT, EXPLICIT), OPERATOR).markStart() 239 .sequence(primitiveOrIdentifier, LPAREN).skipToWithNesting(RPAREN, LPAREN, RPAREN)); 240 completeMethod(SubTypeNames.OPERATOR, EShallowEntityType.METHOD, IN_METHOD, 241 typePatternInState(IN_TYPE).sequence(OPERATOR).markStart() 242 .sequence(EnumSet.of(ETokenClass.OPERATOR, ETokenClass.KEYWORD), LPAREN) 243 .skipToWithNesting(RPAREN, LPAREN, RPAREN)); 244 } 245 246 /** 247 * Finishes a recognizer that begins parsing a constructor like method. Those 248 * are constructors, static constructors and destructors. The corresponding 249 * subtype must be passed. 250 */ 251 private void finishConstructorLike(RecognizerBase<EGenericParserStates> recognizer, String subtype) { 252 RecognizerBase<EGenericParserStates> alternative = recognizer.sequence(getValidIdentifiers(), LPAREN) 253 .skipTo(RPAREN).skipBeforeWithNesting(EnumSet.of(LBRACE, DOUBLE_ARROW), LPAREN, RPAREN); 254 255 RecognizerBase<EGenericParserStates> lambdaAlternative = alternative.sequence(DOUBLE_ARROW) 256 .createNode(EShallowEntityType.METHOD, subtype, 0); 257 lambdaAlternative.sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 258 lambdaAlternative.parseOnce(IN_METHOD).endNode(); 259 260 alternative.sequence(LBRACE).createNode(EShallowEntityType.METHOD, subtype, 0).parseUntil(IN_METHOD) 261 .sequence(RBRACE).endNode(); 262 } 263 264 /** 265 * Creates a new recognizer that can match an explicit interface qualifier 266 * prefix for a method-like construct. This includes sequences of identifiers 267 * with dots, possibly intermixed with template arguments. 268 */ 269 private RecognizerBase<EGenericParserStates> createExplicitInterfaceQualifierRecognizer() { 270 // remember the start of the recognizer chain (we can not use the 271 // result of the method chain, as this would be the last recognizer) 272 return createRecognizer(start -> start.sequence(getValidIdentifiers()).skipNested(LT, GT).sequence(DOT)); 273 } 274 275 /** 276 * Completes a method-like construct. This begins with searching for the first 277 * semicolon or brace, i.e., the parameter list should already be skipped. This 278 * ends either in a complete method with a body, or with a semicolon and thus is 279 * just an abstract method. 280 */ 281 private static void completeMethod(String name, EShallowEntityType nodeType, EGenericParserStates subParseState, 282 RecognizerBase<EGenericParserStates> start) { 283 RecognizerBase<EGenericParserStates> alternative = start 284 .skipBefore(EnumSet.of(LBRACE, SEMICOLON, DOUBLE_ARROW)); 285 286 // for lambdas, we always parse IN_METHOD and ignore the subParseState 287 RecognizerBase<EGenericParserStates> lambdaAlternative = alternative.sequence(DOUBLE_ARROW).createNode(nodeType, 288 name, 0); 289 lambdaAlternative.sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 290 lambdaAlternative.parseOnce(IN_METHOD).endNode(); 291 292 alternative.sequence(LBRACE).createNode(nodeType, name, 0).parseUntil(subParseState).sequence(RBRACE).endNode(); 293 alternative.sequence(SEMICOLON).createNode(nodeType, "abstract " + name, 0).endNode(); 294 } 295 296 @Override 297 protected void createStatementRules() { 298 createLocalFunctionRules(); 299 super.createStatementRules(); 300 } 301 302 /** 303 * Creates rules for handling of local functions. 304 * 305 * @see <a href= 306 * "https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/local-functions">Local 307 * functions (C# Programming Guide)</a> 308 */ 309 private void createLocalFunctionRules() { 310 EnumSet<ETokenType> typeStart = EnumSet.copyOf(PRIMITIVE_TYPES); 311 typeStart.addAll(getValidIdentifiers()); 312 inState(IN_METHOD) 313 // method declarations do never start with AWAIT. That is a function call (e.g., 314 // await foo(); ) 315 .notPreCondition(createRecognizer(start -> start.sequence(ETokenType.AWAIT))) 316 .optional(EnumSet.of(ASYNC, UNSAFE)).sequence(typeStart).optional(QUESTION).repeated(DOT, typeStart) 317 .skipNested(LT, GT).repeated(MULT).repeatedSubRecognizer(new ArrayBracketsRecognizer()).markStart() 318 .sequence(getValidIdentifiers(), LPAREN) 319 .createNode(EShallowEntityType.METHOD, SubTypeNames.LOCAL_FUNCTION, 0).skipTo(LBRACE) 320 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 321 322 // tuple variant 323 inState(IN_METHOD).optional(EnumSet.of(ASYNC, UNSAFE)).sequence(LPAREN) 324 .skipToWithNesting(RPAREN, LPAREN, RPAREN).markStart().sequence(getValidIdentifiers(), LPAREN) 325 .createNode(EShallowEntityType.METHOD, SubTypeNames.LOCAL_FUNCTION, 0).skipTo(LBRACE) 326 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 327 328 } 329 330 /** {@inheritDoc} */ 331 @Override 332 protected void createCaseRule() { 333 super.createCaseRule(); 334 335 // C# also allows any kind of constant expression as a case label, e.g: 336 // Foo.BAR + Foo.GOO << 12 337 inState(IN_METHOD).markStart().sequence(CASE).skipTo(COLON).createNode(EShallowEntityType.META, 0).endNode(); 338 } 339 340 /** 341 * {@inheritDoc} 342 * <p> 343 * Also returns all contextual keywords, as they are valid identifiers in the 344 * language. See http://msdn.microsoft.com/en-us/library/x53a06bb.aspx for the 345 * full list. 346 */ 347 @Override 348 protected EnumSet<ETokenType> getValidIdentifiers() { 349 return VALID_IDENTIFIERS; 350 } 351 352 /** {@inheritDoc} */ 353 @Override 354 protected EnumSet<ETokenType> getSimpleBlockKeywordsWithParentheses() { 355 return EnumSet.of(WHILE, FOR, SWITCH, LOCK, USING, FIXED, FOREACH); 356 } 357 358 /** {@inheritDoc} */ 359 @Override 360 protected EnumSet<ETokenType> getSimpleBlockKeywordsWithoutParentheses() { 361 return EnumSet.of(ELSE, FINALLY, CHECKED, UNCHECKED, UNSAFE); 362 } 363 364 /** {@inheritDoc} */ 365 @Override 366 protected EnumSet<ETokenType> getStatementStartTokens() { 367 // literals are necessary for statements within arrow methods 368 return EnumSet.of(NEW, BREAK, CONTINUE, RETURN, ASSERT, CONST, GOTO, BASE, THROW, THIS, CHECKED, SIZEOF, 369 STACKALLOC, TYPEOF, VALUE, YIELD, LPAREN, PLUSPLUS, MINUSMINUS, NOT, PLUS, MINUS, COMP, TRUE, FALSE, 370 INTEGER_LITERAL, FLOATING_POINT_LITERAL, STRING_LITERAL, IDENTIFIER); 371 } 372 373 /** {@inheritDoc} */ 374 @Override 375 protected RecognizerBase<EGenericParserStates> typePattern(RecognizerBase<EGenericParserStates> currentState) { 376 EnumSet<ETokenType> modifierKeywords = getTypeAndMemberModifiers(); 377 EnumSet<ETokenType> typeStart = EnumSet.copyOf(PRIMITIVE_TYPES); 378 typeStart.addAll(getValidIdentifiers()); 379 380 // we include "?" in the skipping section to deal with nullable types 381 // (e.g. Foo? foo;) 382 // the repeated (DOT, typeStart) is used for full qualified type names 383 return currentState.repeated(modifierKeywords).sequence(typeStart).repeated(DOT, typeStart).skipNested(LT, GT) 384 .repeated(EnumSet.of(QUESTION, MULT)).repeatedSubRecognizer(new ArrayBracketsRecognizer()); 385 } 386 387 /** {@inheritDoc} */ 388 @Override 389 protected void createSubExpressionRules() { 390 // anonymous delegate methods 391 inState(IN_EXPRESSION).sequence(DELEGATE, LPAREN) 392 .createNode(EShallowEntityType.METHOD, SubTypeNames.ANONYMOUS_METHOD) 393 .skipToWithNesting(RPAREN, LPAREN, RPAREN).sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE) 394 .endNode(); 395 396 createLambdaWithArrowRules(DOUBLE_ARROW); 397 } 398 399 /** {@inheritDoc} */ 400 @Override 401 protected RecognizerBase<EGenericParserStates> getSubExpressionRecognizer() { 402 return new CsDelegateAndLambdaRecognizer(); 403 } 404 405 /** {@inheritDoc} */ 406 @Override 407 protected boolean isFilteredToken(IToken token, IToken previousToken) { 408 return super.isFilteredToken(token, previousToken) || isPragmaWarning(token); 409 } 410 411 private static boolean isPragmaWarning(IToken token) { 412 return token.getType() == ETokenType.PREPROCESSOR_DIRECTIVE && token.getText().startsWith("#pragma warning"); 413 } 414 415}