001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.rust; 018 019import static eu.cqse.check.framework.scanner.ETokenType.AND; 020import static eu.cqse.check.framework.scanner.ETokenType.ARROW; 021import static eu.cqse.check.framework.scanner.ETokenType.ATTRIBUTE_DIRECTIVE; 022import static eu.cqse.check.framework.scanner.ETokenType.BOOLEAN_LITERAL; 023import static eu.cqse.check.framework.scanner.ETokenType.BREAK; 024import static eu.cqse.check.framework.scanner.ETokenType.CHARACTER_LITERAL; 025import static eu.cqse.check.framework.scanner.ETokenType.COLON; 026import static eu.cqse.check.framework.scanner.ETokenType.COMMA; 027import static eu.cqse.check.framework.scanner.ETokenType.CONST; 028import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE; 029import static eu.cqse.check.framework.scanner.ETokenType.CRATE; 030import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE_ARROW; 031import static eu.cqse.check.framework.scanner.ETokenType.ELSE; 032import static eu.cqse.check.framework.scanner.ETokenType.ENUM; 033import static eu.cqse.check.framework.scanner.ETokenType.EXTERN; 034import static eu.cqse.check.framework.scanner.ETokenType.FLOATING_POINT_LITERAL; 035import static eu.cqse.check.framework.scanner.ETokenType.FN; 036import static eu.cqse.check.framework.scanner.ETokenType.FOR; 037import static eu.cqse.check.framework.scanner.ETokenType.GT; 038import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER; 039import static eu.cqse.check.framework.scanner.ETokenType.IF; 040import static eu.cqse.check.framework.scanner.ETokenType.IMPL; 041import static eu.cqse.check.framework.scanner.ETokenType.INTEGER_LITERAL; 042import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 043import static eu.cqse.check.framework.scanner.ETokenType.LBRACK; 044import static eu.cqse.check.framework.scanner.ETokenType.LET; 045import static eu.cqse.check.framework.scanner.ETokenType.LIFETIME; 046import static eu.cqse.check.framework.scanner.ETokenType.LOOP; 047import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 048import static eu.cqse.check.framework.scanner.ETokenType.LT; 049import static eu.cqse.check.framework.scanner.ETokenType.MACRO_RULES; 050import static eu.cqse.check.framework.scanner.ETokenType.MATCH; 051import static eu.cqse.check.framework.scanner.ETokenType.MINUS; 052import static eu.cqse.check.framework.scanner.ETokenType.MOD; 053import static eu.cqse.check.framework.scanner.ETokenType.MULT; 054import static eu.cqse.check.framework.scanner.ETokenType.MUT; 055import static eu.cqse.check.framework.scanner.ETokenType.NOT; 056import static eu.cqse.check.framework.scanner.ETokenType.OR; 057import static eu.cqse.check.framework.scanner.ETokenType.OROR; 058import static eu.cqse.check.framework.scanner.ETokenType.PUB; 059import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 060import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 061import static eu.cqse.check.framework.scanner.ETokenType.REF; 062import static eu.cqse.check.framework.scanner.ETokenType.RETURN; 063import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 064import static eu.cqse.check.framework.scanner.ETokenType.SELF; 065import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON; 066import static eu.cqse.check.framework.scanner.ETokenType.STATIC; 067import static eu.cqse.check.framework.scanner.ETokenType.STRING_LITERAL; 068import static eu.cqse.check.framework.scanner.ETokenType.STRUCT; 069import static eu.cqse.check.framework.scanner.ETokenType.SUPER; 070import static eu.cqse.check.framework.scanner.ETokenType.TRAIT; 071import static eu.cqse.check.framework.scanner.ETokenType.TYPE; 072import static eu.cqse.check.framework.scanner.ETokenType.UNSAFE; 073import static eu.cqse.check.framework.scanner.ETokenType.USE; 074import static eu.cqse.check.framework.scanner.ETokenType.WHILE; 075import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_ENUM; 076import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_EXPRESSION; 077import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_LAMBDA; 078import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_MATCH; 079import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_METHOD; 080import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_MODULE; 081import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_STRUCT; 082import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_TRAIT; 083import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_TUPLE_STRUCT; 084 085import java.util.Arrays; 086import java.util.EnumSet; 087import java.util.List; 088 089import org.conqat.lib.commons.region.Region; 090 091import eu.cqse.check.framework.scanner.ETokenType; 092import eu.cqse.check.framework.shallowparser.SubTypeNames; 093import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 094import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 095import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase; 096import eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates; 097 098/** 099 * A shallow parser for the rust language. 100 */ 101public class RustShallowParser extends ShallowParserBase<ERustParserStates> { 102 103 /** All possible states of the RustShallowParser. */ 104 public static enum ERustParserStates { 105 106 /** State for parsing module-level entites. */ 107 IN_MODULE, 108 109 /** State for parsing struct members. */ 110 IN_STRUCT, 111 112 /** State for parsing tuple-struct members. */ 113 IN_TUPLE_STRUCT, 114 115 /** State for parsing trait and implementation members. */ 116 IN_TRAIT, 117 118 /** State for parsing enum members. */ 119 IN_ENUM, 120 121 /** State for parsing within a method. */ 122 IN_METHOD, 123 124 /** State for parsing within a match statement. */ 125 IN_MATCH, 126 127 /** State for parsing sub-expressions. */ 128 IN_EXPRESSION, 129 130 /** State for parsing within a single lambda expression. */ 131 IN_LAMBDA, 132 } 133 134 /** All token types that may start a match clause. */ 135 private static final EnumSet<ETokenType> MATCH_CLAUSE_START_TYPES = EnumSet.of(CHARACTER_LITERAL, STRING_LITERAL, 136 INTEGER_LITERAL, FLOATING_POINT_LITERAL, BOOLEAN_LITERAL, IDENTIFIER, LPAREN, REF, MUT); 137 138 /** All token types that may start a simple statement. */ 139 private static final EnumSet<ETokenType> STATEMENT_START_TOKENS = EnumSet.of(RETURN, CONTINUE, BREAK, SUPER, MULT, 140 AND, NOT, MINUS, LT, SELF); 141 142 /** A list of opening parenthesis and brackets. */ 143 private static final List<ETokenType> OPENING_PARENS = Arrays.asList(LPAREN, LBRACK); 144 145 /** A list of opening parenthesis and brackets. */ 146 private static final List<ETokenType> CLOSING_PARENS = Arrays.asList(RPAREN, RBRACK); 147 148 /** A list of opening parenthesis, brackets, angle brackets and braces. */ 149 private static final List<ETokenType> OPENING_BRACES = Arrays.asList(LPAREN, LBRACK, LBRACE); 150 151 /** A list of closing parenthesis, brackets and braces. */ 152 private static final List<ETokenType> CLOSING_BRACES = Arrays.asList(RPAREN, RBRACK, RBRACE); 153 154 static { 155 // all tokens that start a match clause can start a simple statement as 156 // well 157 STATEMENT_START_TOKENS.addAll(MATCH_CLAUSE_START_TYPES); 158 } 159 160 /** Constructor. */ 161 public RustShallowParser() { 162 super(ERustParserStates.class, ERustParserStates.IN_MODULE); 163 164 createModuleRules(); 165 createMethodRules(); 166 createEnumRules(); 167 createTraitRules(); 168 createStructRules(); 169 createMetaRules(); 170 createStatementRules(); 171 createLambdaRules(); 172 } 173 174 /** 175 * Create rules for parsing module level entities like modules, enums, 176 * impls, traits and global variables. 177 */ 178 private void createModuleRules() { 179 createModuleLevelRule(IN_MODULE, EShallowEntityType.MODULE, MOD, SubTypeNames.MODULE); 180 createModuleLevelTypeRule(IN_TRAIT, IMPL, SubTypeNames.IMPLEMENTATION); 181 createModuleLevelTypeRule(IN_TRAIT, TRAIT, 0); 182 createModuleLevelTypeRule(IN_ENUM, ENUM, 0); 183 createModuleStructRules(); 184 185 // rule for global variables 186 createVariableRules(IN_MODULE, EnumSet.of(CONST, STATIC), EShallowEntityType.ATTRIBUTE, 187 SubTypeNames.GLOBAL_VARIABLE); 188 } 189 190 /** Create rules for parsing unit, tuple and normal structs. */ 191 private void createModuleStructRules() { 192 RecognizerBase<ERustParserStates> structRule = inState(IN_MODULE).optional(PUB).markStart() 193 .sequence(STRUCT, IDENTIFIER).skipBefore(EnumSet.of(SEMICOLON, LPAREN, LBRACE)); 194 structRule.sequence(SEMICOLON).createNode(EShallowEntityType.TYPE, SubTypeNames.UNIT_STRUCT, 1).endNode(); 195 finishStructRule(structRule, IN_STRUCT, SubTypeNames.STRUCT, LBRACE, RBRACE); 196 finishStructRule(structRule, IN_TUPLE_STRUCT, SubTypeNames.TUPLE_STRUCT, LPAREN, RPAREN); 197 } 198 199 /** 200 * Finishes the given struct rule. The struct's members are enclosed in the 201 * given opening and closing tokens and are parsed in the given sub-state. 202 * The created struct entity will have the given sub-type. 203 */ 204 private static void finishStructRule(RecognizerBase<ERustParserStates> structRule, ERustParserStates subState, 205 String subType, ETokenType openingToken, ETokenType closingToken) { 206 structRule.sequence(openingToken).createNode(EShallowEntityType.TYPE, subType, 1).parseUntil(subState) 207 .sequence(closingToken).optional(SEMICOLON).endNode(); 208 } 209 210 /** 211 * Creates a rule for parsing a module-level type. The type is declared by 212 * the given keyword and the created entity will have the given sub-type. 213 * The members of the type are parsed in the given sub-state. 214 */ 215 private void createModuleLevelTypeRule(ERustParserStates subState, ETokenType keyword, Object subType) { 216 createModuleLevelRule(subState, EShallowEntityType.TYPE, keyword, subType); 217 } 218 219 /** 220 * Creates a rule for parsing a module-level entity. The entity is declared 221 * by the given keyword and the created shallow entity will have the given 222 * type and sub-type. The members of the entity are parsed in the given 223 * sub-state. 224 */ 225 private void createModuleLevelRule(ERustParserStates subState, EShallowEntityType type, ETokenType keyword, 226 Object subType) { 227 inState(IN_MODULE).repeated(EnumSet.of(PUB, UNSAFE)).markStart().sequence(keyword).skipNested(LT, GT) 228 .sequence(IDENTIFIER).createNode(type, subType, -1).skipTo(LBRACE).parseUntil(subState).sequence(RBRACE) 229 .endNode(); 230 } 231 232 /** Create rules for parsing methods declarations and definitions. */ 233 private void createMethodRules() { 234 RecognizerBase<ERustParserStates> methodRule = inState(IN_MODULE, IN_TRAIT, IN_METHOD) 235 .repeated(EnumSet.of(PUB, UNSAFE)).markStart().sequence(FN, IDENTIFIER) 236 .skipBefore(EnumSet.of(LBRACE, SEMICOLON)); 237 methodRule.sequence(SEMICOLON).createNode(EShallowEntityType.METHOD, SubTypeNames.FUNCTION_DECLARATION, 1) 238 .endNode(); 239 methodRule.sequence(LBRACE).createNode(EShallowEntityType.METHOD, SubTypeNames.FUNCTION, 1) 240 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 241 } 242 243 /** Creates a rule for parsing enum literals. */ 244 private void createEnumRules() { 245 inState(IN_ENUM).sequence(IDENTIFIER).createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0) 246 .skipBeforeWithNesting(EnumSet.of(COMMA, RBRACE), OPENING_BRACES, CLOSING_BRACES, 247 getSubExpressionRecognizer()) 248 .optional(COMMA).endNode(); 249 } 250 251 /** Creates a rule for parsing associated types in traits. */ 252 private void createTraitRules() { 253 inState(IN_TRAIT).sequence(TYPE, IDENTIFIER) 254 .createNode(EShallowEntityType.META, SubTypeNames.ASSOCIATED_TYPE, 1).skipTo(SEMICOLON).endNode(); 255 } 256 257 /** Creates a rule for parsing members of structs. */ 258 private void createStructRules() { 259 createStructRule(IN_STRUCT, 0); 260 createStructRule(IN_TUPLE_STRUCT, null); 261 } 262 263 /** 264 * Create a rule for parsing a member of a struct. The rule start in the 265 * given state and creates an entity, that will have the given name. 266 */ 267 private void createStructRule(ERustParserStates state, Object name) { 268 inState(state).repeated(EnumSet.of(PUB, MUT)).markStart().sequence(IDENTIFIER) 269 .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ATTRIBUTE, name) 270 .skipBeforeWithNesting(EnumSet.of(COMMA, RPAREN, RBRACE), Arrays.asList(LPAREN, LBRACK, LBRACE, LT), 271 Arrays.asList(RPAREN, RBRACK, RBRACE, GT)) 272 .optional(COMMA).endNode(); 273 } 274 275 /** 276 * Creates rules for parsing meta entities like use and extern declarations, 277 * macros, type aliases, attributes and labels. 278 */ 279 private void createMetaRules() { 280 inAnyState().optional(PUB).markStart().sequence(USE).createNode(EShallowEntityType.META, 0).skipTo(SEMICOLON) 281 .endNode(); 282 283 inAnyState().sequence(EXTERN, CRATE, IDENTIFIER).createNode(EShallowEntityType.META, new int[] { 0, 1 }, 2) 284 .skipTo(SEMICOLON).endNode(); 285 286 inAnyState().sequence(ATTRIBUTE_DIRECTIVE).createNode(EShallowEntityType.META, SubTypeNames.ATTRIBUTE_DIRECTIVE) 287 .endNode(); 288 289 inAnyState().sequence(TYPE, IDENTIFIER).createNode(EShallowEntityType.META, SubTypeNames.TYPE_ALIAS, 1) 290 .skipTo(SEMICOLON).endNode(); 291 292 inAnyState().sequence(LIFETIME, COLON).createNode(EShallowEntityType.META, SubTypeNames.LABEL, 0).endNode(); 293 294 inAnyState().sequence(MACRO_RULES, IDENTIFIER).createNode(EShallowEntityType.META, SubTypeNames.MACRO, 1) 295 .skipNested(LBRACE, RBRACE).endNode(); 296 } 297 298 /** Creates rules for parsing statements within methods. */ 299 private void createStatementRules() { 300 createBlockRule(SubTypeNames.ANONYMOUS_BLOCK, LBRACE); 301 createBlockRule(SubTypeNames.UNSAFE_BLOCK, UNSAFE, LBRACE); 302 303 createIfRules(); 304 createLoopRules(); 305 createMatchRules(); 306 createVariableRules(IN_METHOD, EnumSet.of(LET, STATIC, CONST), EShallowEntityType.STATEMENT, 307 SubTypeNames.LOCAL_VARIABLE); 308 createSimpleStatementRules(); 309 } 310 311 /** 312 * Creates a rule for parsing a block. The block must start with the given 313 * sequence and will have the given sub type. 314 */ 315 private void createBlockRule(String subType, Object... sequence) { 316 inState(IN_METHOD, IN_EXPRESSION).sequence(sequence).createNode(EShallowEntityType.STATEMENT, subType) 317 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 318 } 319 320 /** Creates rules for parsing if/else-if/else constructs. */ 321 private void createIfRules() { 322 createIfRule(true, IF); 323 createIfRule(true, ELSE, IF); 324 createIfRule(false, ELSE); 325 326 } 327 328 /** 329 * Creates a rule that matches an if construct that starts with the given 330 * sequence. If continued is false, the created entity can be continued by 331 * an else token. 332 */ 333 private void createIfRule(boolean continued, Object... ifSequence) { 334 RecognizerBase<ERustParserStates> ifRule = inState(IN_METHOD, IN_EXPRESSION, IN_LAMBDA).sequence(ifSequence) 335 .skipToWithNesting(LBRACE, OPENING_PARENS, CLOSING_PARENS, getSubExpressionRecognizer()) 336 .createNode(EShallowEntityType.STATEMENT, new Region(0, ifSequence.length - 1)).parseUntil(IN_METHOD) 337 .sequence(RBRACE); 338 if (continued) { 339 endWithPossibleContinuation(ifRule, EnumSet.of(ELSE)); 340 } else { 341 ifRule.endNode(); 342 } 343 } 344 345 /** Creates rules for parsing loops. */ 346 private void createLoopRules() { 347 // sub expressions, while let 348 inState(IN_METHOD).sequence(EnumSet.of(LOOP, FOR, WHILE)) 349 .skipToWithNesting(LBRACE, OPENING_PARENS, CLOSING_PARENS).createNode(EShallowEntityType.STATEMENT, 0) 350 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 351 } 352 353 /** Creates rules for parsing match statements. */ 354 private void createMatchRules() { 355 inState(IN_METHOD, IN_EXPRESSION, IN_LAMBDA).sequence(MATCH) 356 .skipToWithNesting(LBRACE, OPENING_PARENS, CLOSING_PARENS, getSubExpressionRecognizer()) 357 .createNode(EShallowEntityType.STATEMENT, 0).parseUntil(IN_MATCH).sequence(RBRACE).endNode(); 358 359 RecognizerBase<ERustParserStates> clauseRule = inState(IN_MATCH).sequence(MATCH_CLAUSE_START_TYPES) 360 .skipTo(DOUBLE_ARROW).createNode(EShallowEntityType.META, SubTypeNames.MATCH_CLAUSE); 361 clauseRule.sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE).optional(COMMA).endNode(); 362 clauseRule.parseOnce(IN_METHOD).optional(COMMA).endNode(); 363 } 364 365 /** 366 * Creates rules for parsing variable declarations in the given state. The 367 * declaration must start with one of the given keywords. The created entity 368 * will have the given type and sub-type. 369 */ 370 private void createVariableRules(ERustParserStates state, EnumSet<ETokenType> keywords, EShallowEntityType type, 371 String subType) { 372 RecognizerBase<ERustParserStates> rule = inState(state).optional(PUB).sequence(keywords) 373 .repeated(EnumSet.of(MUT, REF)); 374 finishVariableRule(rule.markStart().sequence(IDENTIFIER), type, subType, 0); 375 finishVariableRule(rule, type, subType, null); 376 } 377 378 /** 379 * Finishes the given variable declaration rule with the given type, 380 * sub-type and name. 381 */ 382 private static void finishVariableRule(RecognizerBase<ERustParserStates> rule, EShallowEntityType type, 383 String subType, Object name) { 384 rule.createNode(type, subType, name) 385 .skipToWithNesting(SEMICOLON, OPENING_BRACES, CLOSING_BRACES, getSubExpressionRecognizer()).endNode(); 386 } 387 388 /** Creates rules for parsing simple and empty statements. */ 389 private void createSimpleStatementRules() { 390 createSimpleStatementRule(IN_METHOD, SubTypeNames.SIMPLE_STATEMENT, true); 391 392 inState(IN_METHOD).sequence(SEMICOLON).createNode(EShallowEntityType.STATEMENT, SubTypeNames.EMPTY_STATEMENT) 393 .endNode(); 394 } 395 396 /** 397 * Creates a rule for parsing simple statements in the given state. The 398 * created entity will have the given sub-type. If the statement is finished 399 * by a semicolon and consumeSemicolon is true, the rule will consume it, 400 * otherwise it will be left for outer rules (e. g. in lambda expressions). 401 */ 402 private void createSimpleStatementRule(ERustParserStates state, String subType, boolean consumeSemicolon) { 403 RecognizerBase<ERustParserStates> statementRule = inState(state).sequence(STATEMENT_START_TOKENS) 404 .createNode(EShallowEntityType.STATEMENT, subType, 0) 405 .skipBeforeWithNesting(EnumSet.of(SEMICOLON, COMMA, RBRACE, RPAREN), OPENING_BRACES, CLOSING_BRACES, 406 getSubExpressionRecognizer()); 407 if (consumeSemicolon) { 408 statementRule = statementRule.optional(SEMICOLON); 409 } 410 statementRule.endNode(); 411 } 412 413 /** Creates rules for parsing lambda expressions. */ 414 private void createLambdaRules() { 415 continueLambdaRule(inState(IN_EXPRESSION).sequence(OR).skipTo(OR)); 416 continueLambdaRule(inState(IN_EXPRESSION).sequence(OROR)); 417 418 createSimpleStatementRule(IN_LAMBDA, SubTypeNames.LAMBDA_EXPRESSION, false); 419 } 420 421 /** 422 * Continues the given lambda rule that has already matched the parameter 423 * specification. 424 */ 425 private static void continueLambdaRule(RecognizerBase<ERustParserStates> lambdaRule) { 426 lambdaRule = lambdaRule.createNode(EShallowEntityType.METHOD, SubTypeNames.LAMBDA); 427 finishLambdaRule(lambdaRule.sequence(LBRACE)); 428 finishLambdaRule(lambdaRule.sequence(ARROW).skipTo(LBRACE)); 429 lambdaRule.parseOnce(IN_LAMBDA).endNode(); 430 } 431 432 /** 433 * Finishes the given lambda rule. The rule must already have matched the 434 * opening brace. 435 */ 436 private static void finishLambdaRule(RecognizerBase<ERustParserStates> lambdaRule) { 437 lambdaRule.parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 438 } 439 440 /** 441 * Returns a recognizer for parsing sub-expressions like lambdas and if, 442 * match and block expressions. 443 */ 444 private static RecognizerBase<ERustParserStates> getSubExpressionRecognizer() { 445 return new RustSubExpressionRecognizer(); 446 } 447}