001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.rust;
018
019import static eu.cqse.check.framework.scanner.ETokenType.AND;
020import static eu.cqse.check.framework.scanner.ETokenType.ARROW;
021import static eu.cqse.check.framework.scanner.ETokenType.ATTRIBUTE_DIRECTIVE;
022import static eu.cqse.check.framework.scanner.ETokenType.BOOLEAN_LITERAL;
023import static eu.cqse.check.framework.scanner.ETokenType.BREAK;
024import static eu.cqse.check.framework.scanner.ETokenType.CHARACTER_LITERAL;
025import static eu.cqse.check.framework.scanner.ETokenType.COLON;
026import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
027import static eu.cqse.check.framework.scanner.ETokenType.CONST;
028import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE;
029import static eu.cqse.check.framework.scanner.ETokenType.CRATE;
030import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE_ARROW;
031import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
032import static eu.cqse.check.framework.scanner.ETokenType.ENUM;
033import static eu.cqse.check.framework.scanner.ETokenType.EXTERN;
034import static eu.cqse.check.framework.scanner.ETokenType.FLOATING_POINT_LITERAL;
035import static eu.cqse.check.framework.scanner.ETokenType.FN;
036import static eu.cqse.check.framework.scanner.ETokenType.FOR;
037import static eu.cqse.check.framework.scanner.ETokenType.GT;
038import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
039import static eu.cqse.check.framework.scanner.ETokenType.IF;
040import static eu.cqse.check.framework.scanner.ETokenType.IMPL;
041import static eu.cqse.check.framework.scanner.ETokenType.INTEGER_LITERAL;
042import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
043import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
044import static eu.cqse.check.framework.scanner.ETokenType.LET;
045import static eu.cqse.check.framework.scanner.ETokenType.LIFETIME;
046import static eu.cqse.check.framework.scanner.ETokenType.LOOP;
047import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
048import static eu.cqse.check.framework.scanner.ETokenType.LT;
049import static eu.cqse.check.framework.scanner.ETokenType.MACRO_RULES;
050import static eu.cqse.check.framework.scanner.ETokenType.MATCH;
051import static eu.cqse.check.framework.scanner.ETokenType.MINUS;
052import static eu.cqse.check.framework.scanner.ETokenType.MOD;
053import static eu.cqse.check.framework.scanner.ETokenType.MULT;
054import static eu.cqse.check.framework.scanner.ETokenType.MUT;
055import static eu.cqse.check.framework.scanner.ETokenType.NOT;
056import static eu.cqse.check.framework.scanner.ETokenType.OR;
057import static eu.cqse.check.framework.scanner.ETokenType.OROR;
058import static eu.cqse.check.framework.scanner.ETokenType.PUB;
059import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
060import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
061import static eu.cqse.check.framework.scanner.ETokenType.REF;
062import static eu.cqse.check.framework.scanner.ETokenType.RETURN;
063import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
064import static eu.cqse.check.framework.scanner.ETokenType.SELF;
065import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
066import static eu.cqse.check.framework.scanner.ETokenType.STATIC;
067import static eu.cqse.check.framework.scanner.ETokenType.STRING_LITERAL;
068import static eu.cqse.check.framework.scanner.ETokenType.STRUCT;
069import static eu.cqse.check.framework.scanner.ETokenType.SUPER;
070import static eu.cqse.check.framework.scanner.ETokenType.TRAIT;
071import static eu.cqse.check.framework.scanner.ETokenType.TYPE;
072import static eu.cqse.check.framework.scanner.ETokenType.UNSAFE;
073import static eu.cqse.check.framework.scanner.ETokenType.USE;
074import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
075import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_ENUM;
076import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_EXPRESSION;
077import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_LAMBDA;
078import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_MATCH;
079import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_METHOD;
080import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_MODULE;
081import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_STRUCT;
082import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_TRAIT;
083import static eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates.IN_TUPLE_STRUCT;
084
085import java.util.Arrays;
086import java.util.EnumSet;
087import java.util.List;
088
089import org.conqat.lib.commons.region.Region;
090
091import eu.cqse.check.framework.scanner.ETokenType;
092import eu.cqse.check.framework.shallowparser.SubTypeNames;
093import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
094import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
095import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
096import eu.cqse.check.framework.shallowparser.languages.rust.RustShallowParser.ERustParserStates;
097
098/**
099 * A shallow parser for the rust language.
100 */
101public class RustShallowParser extends ShallowParserBase<ERustParserStates> {
102
103        /** All possible states of the RustShallowParser. */
104        public static enum ERustParserStates {
105
106                /** State for parsing module-level entites. */
107                IN_MODULE,
108
109                /** State for parsing struct members. */
110                IN_STRUCT,
111
112                /** State for parsing tuple-struct members. */
113                IN_TUPLE_STRUCT,
114
115                /** State for parsing trait and implementation members. */
116                IN_TRAIT,
117
118                /** State for parsing enum members. */
119                IN_ENUM,
120
121                /** State for parsing within a method. */
122                IN_METHOD,
123
124                /** State for parsing within a match statement. */
125                IN_MATCH,
126
127                /** State for parsing sub-expressions. */
128                IN_EXPRESSION,
129
130                /** State for parsing within a single lambda expression. */
131                IN_LAMBDA,
132        }
133
134        /** All token types that may start a match clause. */
135        private static final EnumSet<ETokenType> MATCH_CLAUSE_START_TYPES = EnumSet.of(CHARACTER_LITERAL, STRING_LITERAL,
136                        INTEGER_LITERAL, FLOATING_POINT_LITERAL, BOOLEAN_LITERAL, IDENTIFIER, LPAREN, REF, MUT);
137
138        /** All token types that may start a simple statement. */
139        private static final EnumSet<ETokenType> STATEMENT_START_TOKENS = EnumSet.of(RETURN, CONTINUE, BREAK, SUPER, MULT,
140                        AND, NOT, MINUS, LT, SELF);
141
142        /** A list of opening parenthesis and brackets. */
143        private static final List<ETokenType> OPENING_PARENS = Arrays.asList(LPAREN, LBRACK);
144
145        /** A list of opening parenthesis and brackets. */
146        private static final List<ETokenType> CLOSING_PARENS = Arrays.asList(RPAREN, RBRACK);
147
148        /** A list of opening parenthesis, brackets, angle brackets and braces. */
149        private static final List<ETokenType> OPENING_BRACES = Arrays.asList(LPAREN, LBRACK, LBRACE);
150
151        /** A list of closing parenthesis, brackets and braces. */
152        private static final List<ETokenType> CLOSING_BRACES = Arrays.asList(RPAREN, RBRACK, RBRACE);
153
154        static {
155                // all tokens that start a match clause can start a simple statement as
156                // well
157                STATEMENT_START_TOKENS.addAll(MATCH_CLAUSE_START_TYPES);
158        }
159
160        /** Constructor. */
161        public RustShallowParser() {
162                super(ERustParserStates.class, ERustParserStates.IN_MODULE);
163
164                createModuleRules();
165                createMethodRules();
166                createEnumRules();
167                createTraitRules();
168                createStructRules();
169                createMetaRules();
170                createStatementRules();
171                createLambdaRules();
172        }
173
174        /**
175         * Create rules for parsing module level entities like modules, enums,
176         * impls, traits and global variables.
177         */
178        private void createModuleRules() {
179                createModuleLevelRule(IN_MODULE, EShallowEntityType.MODULE, MOD, SubTypeNames.MODULE);
180                createModuleLevelTypeRule(IN_TRAIT, IMPL, SubTypeNames.IMPLEMENTATION);
181                createModuleLevelTypeRule(IN_TRAIT, TRAIT, 0);
182                createModuleLevelTypeRule(IN_ENUM, ENUM, 0);
183                createModuleStructRules();
184
185                // rule for global variables
186                createVariableRules(IN_MODULE, EnumSet.of(CONST, STATIC), EShallowEntityType.ATTRIBUTE,
187                                SubTypeNames.GLOBAL_VARIABLE);
188        }
189
190        /** Create rules for parsing unit, tuple and normal structs. */
191        private void createModuleStructRules() {
192                RecognizerBase<ERustParserStates> structRule = inState(IN_MODULE).optional(PUB).markStart()
193                                .sequence(STRUCT, IDENTIFIER).skipBefore(EnumSet.of(SEMICOLON, LPAREN, LBRACE));
194                structRule.sequence(SEMICOLON).createNode(EShallowEntityType.TYPE, SubTypeNames.UNIT_STRUCT, 1).endNode();
195                finishStructRule(structRule, IN_STRUCT, SubTypeNames.STRUCT, LBRACE, RBRACE);
196                finishStructRule(structRule, IN_TUPLE_STRUCT, SubTypeNames.TUPLE_STRUCT, LPAREN, RPAREN);
197        }
198
199        /**
200         * Finishes the given struct rule. The struct's members are enclosed in the
201         * given opening and closing tokens and are parsed in the given sub-state.
202         * The created struct entity will have the given sub-type.
203         */
204        private static void finishStructRule(RecognizerBase<ERustParserStates> structRule, ERustParserStates subState,
205                        String subType, ETokenType openingToken, ETokenType closingToken) {
206                structRule.sequence(openingToken).createNode(EShallowEntityType.TYPE, subType, 1).parseUntil(subState)
207                                .sequence(closingToken).optional(SEMICOLON).endNode();
208        }
209
210        /**
211         * Creates a rule for parsing a module-level type. The type is declared by
212         * the given keyword and the created entity will have the given sub-type.
213         * The members of the type are parsed in the given sub-state.
214         */
215        private void createModuleLevelTypeRule(ERustParserStates subState, ETokenType keyword, Object subType) {
216                createModuleLevelRule(subState, EShallowEntityType.TYPE, keyword, subType);
217        }
218
219        /**
220         * Creates a rule for parsing a module-level entity. The entity is declared
221         * by the given keyword and the created shallow entity will have the given
222         * type and sub-type. The members of the entity are parsed in the given
223         * sub-state.
224         */
225        private void createModuleLevelRule(ERustParserStates subState, EShallowEntityType type, ETokenType keyword,
226                        Object subType) {
227                inState(IN_MODULE).repeated(EnumSet.of(PUB, UNSAFE)).markStart().sequence(keyword).skipNested(LT, GT)
228                                .sequence(IDENTIFIER).createNode(type, subType, -1).skipTo(LBRACE).parseUntil(subState).sequence(RBRACE)
229                                .endNode();
230        }
231
232        /** Create rules for parsing methods declarations and definitions. */
233        private void createMethodRules() {
234                RecognizerBase<ERustParserStates> methodRule = inState(IN_MODULE, IN_TRAIT, IN_METHOD)
235                                .repeated(EnumSet.of(PUB, UNSAFE)).markStart().sequence(FN, IDENTIFIER)
236                                .skipBefore(EnumSet.of(LBRACE, SEMICOLON));
237                methodRule.sequence(SEMICOLON).createNode(EShallowEntityType.METHOD, SubTypeNames.FUNCTION_DECLARATION, 1)
238                                .endNode();
239                methodRule.sequence(LBRACE).createNode(EShallowEntityType.METHOD, SubTypeNames.FUNCTION, 1)
240                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
241        }
242
243        /** Creates a rule for parsing enum literals. */
244        private void createEnumRules() {
245                inState(IN_ENUM).sequence(IDENTIFIER).createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0)
246                                .skipBeforeWithNesting(EnumSet.of(COMMA, RBRACE), OPENING_BRACES, CLOSING_BRACES,
247                                                getSubExpressionRecognizer())
248                                .optional(COMMA).endNode();
249        }
250
251        /** Creates a rule for parsing associated types in traits. */
252        private void createTraitRules() {
253                inState(IN_TRAIT).sequence(TYPE, IDENTIFIER)
254                                .createNode(EShallowEntityType.META, SubTypeNames.ASSOCIATED_TYPE, 1).skipTo(SEMICOLON).endNode();
255        }
256
257        /** Creates a rule for parsing members of structs. */
258        private void createStructRules() {
259                createStructRule(IN_STRUCT, 0);
260                createStructRule(IN_TUPLE_STRUCT, null);
261        }
262
263        /**
264         * Create a rule for parsing a member of a struct. The rule start in the
265         * given state and creates an entity, that will have the given name.
266         */
267        private void createStructRule(ERustParserStates state, Object name) {
268                inState(state).repeated(EnumSet.of(PUB, MUT)).markStart().sequence(IDENTIFIER)
269                                .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ATTRIBUTE, name)
270                                .skipBeforeWithNesting(EnumSet.of(COMMA, RPAREN, RBRACE), Arrays.asList(LPAREN, LBRACK, LBRACE, LT),
271                                                Arrays.asList(RPAREN, RBRACK, RBRACE, GT))
272                                .optional(COMMA).endNode();
273        }
274
275        /**
276         * Creates rules for parsing meta entities like use and extern declarations,
277         * macros, type aliases, attributes and labels.
278         */
279        private void createMetaRules() {
280                inAnyState().optional(PUB).markStart().sequence(USE).createNode(EShallowEntityType.META, 0).skipTo(SEMICOLON)
281                                .endNode();
282
283                inAnyState().sequence(EXTERN, CRATE, IDENTIFIER).createNode(EShallowEntityType.META, new int[] { 0, 1 }, 2)
284                                .skipTo(SEMICOLON).endNode();
285
286                inAnyState().sequence(ATTRIBUTE_DIRECTIVE).createNode(EShallowEntityType.META, SubTypeNames.ATTRIBUTE_DIRECTIVE)
287                                .endNode();
288
289                inAnyState().sequence(TYPE, IDENTIFIER).createNode(EShallowEntityType.META, SubTypeNames.TYPE_ALIAS, 1)
290                                .skipTo(SEMICOLON).endNode();
291
292                inAnyState().sequence(LIFETIME, COLON).createNode(EShallowEntityType.META, SubTypeNames.LABEL, 0).endNode();
293
294                inAnyState().sequence(MACRO_RULES, IDENTIFIER).createNode(EShallowEntityType.META, SubTypeNames.MACRO, 1)
295                                .skipNested(LBRACE, RBRACE).endNode();
296        }
297
298        /** Creates rules for parsing statements within methods. */
299        private void createStatementRules() {
300                createBlockRule(SubTypeNames.ANONYMOUS_BLOCK, LBRACE);
301                createBlockRule(SubTypeNames.UNSAFE_BLOCK, UNSAFE, LBRACE);
302
303                createIfRules();
304                createLoopRules();
305                createMatchRules();
306                createVariableRules(IN_METHOD, EnumSet.of(LET, STATIC, CONST), EShallowEntityType.STATEMENT,
307                                SubTypeNames.LOCAL_VARIABLE);
308                createSimpleStatementRules();
309        }
310
311        /**
312         * Creates a rule for parsing a block. The block must start with the given
313         * sequence and will have the given sub type.
314         */
315        private void createBlockRule(String subType, Object... sequence) {
316                inState(IN_METHOD, IN_EXPRESSION).sequence(sequence).createNode(EShallowEntityType.STATEMENT, subType)
317                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
318        }
319
320        /** Creates rules for parsing if/else-if/else constructs. */
321        private void createIfRules() {
322                createIfRule(true, IF);
323                createIfRule(true, ELSE, IF);
324                createIfRule(false, ELSE);
325
326        }
327
328        /**
329         * Creates a rule that matches an if construct that starts with the given
330         * sequence. If continued is false, the created entity can be continued by
331         * an else token.
332         */
333        private void createIfRule(boolean continued, Object... ifSequence) {
334                RecognizerBase<ERustParserStates> ifRule = inState(IN_METHOD, IN_EXPRESSION, IN_LAMBDA).sequence(ifSequence)
335                                .skipToWithNesting(LBRACE, OPENING_PARENS, CLOSING_PARENS, getSubExpressionRecognizer())
336                                .createNode(EShallowEntityType.STATEMENT, new Region(0, ifSequence.length - 1)).parseUntil(IN_METHOD)
337                                .sequence(RBRACE);
338                if (continued) {
339                        endWithPossibleContinuation(ifRule, EnumSet.of(ELSE));
340                } else {
341                        ifRule.endNode();
342                }
343        }
344
345        /** Creates rules for parsing loops. */
346        private void createLoopRules() {
347                // sub expressions, while let
348                inState(IN_METHOD).sequence(EnumSet.of(LOOP, FOR, WHILE))
349                                .skipToWithNesting(LBRACE, OPENING_PARENS, CLOSING_PARENS).createNode(EShallowEntityType.STATEMENT, 0)
350                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
351        }
352
353        /** Creates rules for parsing match statements. */
354        private void createMatchRules() {
355                inState(IN_METHOD, IN_EXPRESSION, IN_LAMBDA).sequence(MATCH)
356                                .skipToWithNesting(LBRACE, OPENING_PARENS, CLOSING_PARENS, getSubExpressionRecognizer())
357                                .createNode(EShallowEntityType.STATEMENT, 0).parseUntil(IN_MATCH).sequence(RBRACE).endNode();
358
359                RecognizerBase<ERustParserStates> clauseRule = inState(IN_MATCH).sequence(MATCH_CLAUSE_START_TYPES)
360                                .skipTo(DOUBLE_ARROW).createNode(EShallowEntityType.META, SubTypeNames.MATCH_CLAUSE);
361                clauseRule.sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE).optional(COMMA).endNode();
362                clauseRule.parseOnce(IN_METHOD).optional(COMMA).endNode();
363        }
364
365        /**
366         * Creates rules for parsing variable declarations in the given state. The
367         * declaration must start with one of the given keywords. The created entity
368         * will have the given type and sub-type.
369         */
370        private void createVariableRules(ERustParserStates state, EnumSet<ETokenType> keywords, EShallowEntityType type,
371                        String subType) {
372                RecognizerBase<ERustParserStates> rule = inState(state).optional(PUB).sequence(keywords)
373                                .repeated(EnumSet.of(MUT, REF));
374                finishVariableRule(rule.markStart().sequence(IDENTIFIER), type, subType, 0);
375                finishVariableRule(rule, type, subType, null);
376        }
377
378        /**
379         * Finishes the given variable declaration rule with the given type,
380         * sub-type and name.
381         */
382        private static void finishVariableRule(RecognizerBase<ERustParserStates> rule, EShallowEntityType type,
383                        String subType, Object name) {
384                rule.createNode(type, subType, name)
385                                .skipToWithNesting(SEMICOLON, OPENING_BRACES, CLOSING_BRACES, getSubExpressionRecognizer()).endNode();
386        }
387
388        /** Creates rules for parsing simple and empty statements. */
389        private void createSimpleStatementRules() {
390                createSimpleStatementRule(IN_METHOD, SubTypeNames.SIMPLE_STATEMENT, true);
391
392                inState(IN_METHOD).sequence(SEMICOLON).createNode(EShallowEntityType.STATEMENT, SubTypeNames.EMPTY_STATEMENT)
393                                .endNode();
394        }
395
396        /**
397         * Creates a rule for parsing simple statements in the given state. The
398         * created entity will have the given sub-type. If the statement is finished
399         * by a semicolon and consumeSemicolon is true, the rule will consume it,
400         * otherwise it will be left for outer rules (e. g. in lambda expressions).
401         */
402        private void createSimpleStatementRule(ERustParserStates state, String subType, boolean consumeSemicolon) {
403                RecognizerBase<ERustParserStates> statementRule = inState(state).sequence(STATEMENT_START_TOKENS)
404                                .createNode(EShallowEntityType.STATEMENT, subType, 0)
405                                .skipBeforeWithNesting(EnumSet.of(SEMICOLON, COMMA, RBRACE, RPAREN), OPENING_BRACES, CLOSING_BRACES,
406                                                getSubExpressionRecognizer());
407                if (consumeSemicolon) {
408                        statementRule = statementRule.optional(SEMICOLON);
409                }
410                statementRule.endNode();
411        }
412
413        /** Creates rules for parsing lambda expressions. */
414        private void createLambdaRules() {
415                continueLambdaRule(inState(IN_EXPRESSION).sequence(OR).skipTo(OR));
416                continueLambdaRule(inState(IN_EXPRESSION).sequence(OROR));
417
418                createSimpleStatementRule(IN_LAMBDA, SubTypeNames.LAMBDA_EXPRESSION, false);
419        }
420
421        /**
422         * Continues the given lambda rule that has already matched the parameter
423         * specification.
424         */
425        private static void continueLambdaRule(RecognizerBase<ERustParserStates> lambdaRule) {
426                lambdaRule = lambdaRule.createNode(EShallowEntityType.METHOD, SubTypeNames.LAMBDA);
427                finishLambdaRule(lambdaRule.sequence(LBRACE));
428                finishLambdaRule(lambdaRule.sequence(ARROW).skipTo(LBRACE));
429                lambdaRule.parseOnce(IN_LAMBDA).endNode();
430        }
431
432        /**
433         * Finishes the given lambda rule. The rule must already have matched the
434         * opening brace.
435         */
436        private static void finishLambdaRule(RecognizerBase<ERustParserStates> lambdaRule) {
437                lambdaRule.parseUntil(IN_METHOD).sequence(RBRACE).endNode();
438        }
439
440        /**
441         * Returns a recognizer for parsing sub-expressions like lambdas and if,
442         * match and block expressions.
443         */
444        private static RecognizerBase<ERustParserStates> getSubExpressionRecognizer() {
445                return new RustSubExpressionRecognizer();
446        }
447}