001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright (c) 2009-2017 CQSE GmbH                                        |
004|                                                                          |
005+-------------------------------------------------------------------------*/
006package eu.cqse.check.framework.shallowparser.languages.oscript;
007
008import static eu.cqse.check.framework.scanner.ETokenType.ADDFEATURE;
009import static eu.cqse.check.framework.scanner.ETokenType.AND;
010import static eu.cqse.check.framework.scanner.ETokenType.ASSOC;
011import static eu.cqse.check.framework.scanner.ETokenType.BOOLEAN;
012import static eu.cqse.check.framework.scanner.ETokenType.BREAK;
013import static eu.cqse.check.framework.scanner.ETokenType.BREAKIF;
014import static eu.cqse.check.framework.scanner.ETokenType.BY;
015import static eu.cqse.check.framework.scanner.ETokenType.CASE;
016import static eu.cqse.check.framework.scanner.ETokenType.COLON;
017import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
018import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE;
019import static eu.cqse.check.framework.scanner.ETokenType.CONTINUEIF;
020import static eu.cqse.check.framework.scanner.ETokenType.DATE;
021import static eu.cqse.check.framework.scanner.ETokenType.DEFAULT;
022import static eu.cqse.check.framework.scanner.ETokenType.DEFINE;
023import static eu.cqse.check.framework.scanner.ETokenType.DO;
024import static eu.cqse.check.framework.scanner.ETokenType.DOLLAR;
025import static eu.cqse.check.framework.scanner.ETokenType.DOT;
026import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE_COLON;
027import static eu.cqse.check.framework.scanner.ETokenType.DOWNTO;
028import static eu.cqse.check.framework.scanner.ETokenType.DYNAMIC;
029import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
030import static eu.cqse.check.framework.scanner.ETokenType.ELSEIF;
031import static eu.cqse.check.framework.scanner.ETokenType.END;
032import static eu.cqse.check.framework.scanner.ETokenType.ENDIF;
033import static eu.cqse.check.framework.scanner.ETokenType.ENDSCRIPT;
034import static eu.cqse.check.framework.scanner.ETokenType.EOL;
035import static eu.cqse.check.framework.scanner.ETokenType.EQUAL;
036import static eu.cqse.check.framework.scanner.ETokenType.FINAL;
037import static eu.cqse.check.framework.scanner.ETokenType.FOR;
038import static eu.cqse.check.framework.scanner.ETokenType.FUNCTION;
039import static eu.cqse.check.framework.scanner.ETokenType.GE;
040import static eu.cqse.check.framework.scanner.ETokenType.GOTO;
041import static eu.cqse.check.framework.scanner.ETokenType.GT;
042import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
043import static eu.cqse.check.framework.scanner.ETokenType.IF;
044import static eu.cqse.check.framework.scanner.ETokenType.IFDEF;
045import static eu.cqse.check.framework.scanner.ETokenType.IFNDEF;
046import static eu.cqse.check.framework.scanner.ETokenType.IN;
047import static eu.cqse.check.framework.scanner.ETokenType.INHERITS;
048import static eu.cqse.check.framework.scanner.ETokenType.INTEGER;
049import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE;
050import static eu.cqse.check.framework.scanner.ETokenType.LBRACE;
051import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
052import static eu.cqse.check.framework.scanner.ETokenType.LE;
053import static eu.cqse.check.framework.scanner.ETokenType.LIST;
054import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
055import static eu.cqse.check.framework.scanner.ETokenType.LT;
056import static eu.cqse.check.framework.scanner.ETokenType.NAME;
057import static eu.cqse.check.framework.scanner.ETokenType.NE;
058import static eu.cqse.check.framework.scanner.ETokenType.NODEBUG;
059import static eu.cqse.check.framework.scanner.ETokenType.NONE;
060import static eu.cqse.check.framework.scanner.ETokenType.NOT;
061import static eu.cqse.check.framework.scanner.ETokenType.OBJECT;
062import static eu.cqse.check.framework.scanner.ETokenType.OR;
063import static eu.cqse.check.framework.scanner.ETokenType.OVERRIDE;
064import static eu.cqse.check.framework.scanner.ETokenType.PACKAGE;
065import static eu.cqse.check.framework.scanner.ETokenType.PARENT;
066import static eu.cqse.check.framework.scanner.ETokenType.PREPROCESSOR_DIRECTIVE;
067import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE;
068import static eu.cqse.check.framework.scanner.ETokenType.PUBLIC;
069import static eu.cqse.check.framework.scanner.ETokenType.RBRACE;
070import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
071import static eu.cqse.check.framework.scanner.ETokenType.REAL;
072import static eu.cqse.check.framework.scanner.ETokenType.REPEAT;
073import static eu.cqse.check.framework.scanner.ETokenType.RETURN;
074import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
075import static eu.cqse.check.framework.scanner.ETokenType.SCRIPT;
076import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
077import static eu.cqse.check.framework.scanner.ETokenType.SET;
078import static eu.cqse.check.framework.scanner.ETokenType.STRING;
079import static eu.cqse.check.framework.scanner.ETokenType.SUPER;
080import static eu.cqse.check.framework.scanner.ETokenType.SWITCH;
081import static eu.cqse.check.framework.scanner.ETokenType.THEN;
082import static eu.cqse.check.framework.scanner.ETokenType.THIS;
083import static eu.cqse.check.framework.scanner.ETokenType.TO;
084import static eu.cqse.check.framework.scanner.ETokenType.UNDEFINED;
085import static eu.cqse.check.framework.scanner.ETokenType.UNTIL;
086import static eu.cqse.check.framework.scanner.ETokenType.USING;
087import static eu.cqse.check.framework.scanner.ETokenType.VOID;
088import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
089import static eu.cqse.check.framework.shallowparser.SubTypeNames.ASSIGNMENT;
090import static eu.cqse.check.framework.shallowparser.SubTypeNames.GLOBAL_VARIABLE;
091import static eu.cqse.check.framework.shallowparser.SubTypeNames.LOCAL_VARIABLE;
092import static eu.cqse.check.framework.shallowparser.SubTypeNames.SIMPLE_STATEMENT;
093import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.ATTRIBUTE;
094import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.META;
095import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.METHOD;
096import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.MODULE;
097import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.STATEMENT;
098import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.TYPE;
099import static eu.cqse.check.framework.shallowparser.languages.oscript.EOScriptParserState.IN_FUNCTION;
100import static eu.cqse.check.framework.shallowparser.languages.oscript.EOScriptParserState.IN_OBJECT;
101import static eu.cqse.check.framework.shallowparser.languages.oscript.EOScriptParserState.IN_SCRIPT;
102import static eu.cqse.check.framework.shallowparser.languages.oscript.EOScriptParserState.TOP_LEVEL;
103
104import java.util.EnumSet;
105
106import org.conqat.lib.commons.region.Region;
107
108import eu.cqse.check.framework.scanner.ETokenType;
109import eu.cqse.check.framework.shallowparser.SubTypeNames;
110import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
111import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
112import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
113
114/**
115 * Shallow Parser for the OpenText OScript language
116 */
117public class OScriptShallowParser extends ShallowParserBase<EOScriptParserState> {
118
119        /** Available types for variables */
120        private static final EnumSet<ETokenType> VARIABLE_TYPES = EnumSet.of(BOOLEAN, INTEGER, REAL, STRING, OBJECT, ASSOC,
121                        LIST, DATE, DYNAMIC, IDENTIFIER);
122
123        /** Tokens indicating end of stataments */
124        private static final EnumSet<ETokenType> STATEMENT_END_TOKENS = EnumSet.of(EOL, SEMICOLON);
125
126        /** Set of all OScript built-in keyword identifiers */
127        private static final EnumSet<ETokenType> OSCRIPT_IDENTIFIERS = EnumSet.of(IDENTIFIER, ADDFEATURE, AND, ASSOC,
128                        BOOLEAN, BREAK, BREAKIF, BY, CASE, CONTINUE, CONTINUEIF, DATE, DEFAULT, DEFINE, DO, DOWNTO, DYNAMIC, ELSE,
129                        ELSEIF, END, ENDIF, ENDSCRIPT, FINAL, FOR, FUNCTION, GE, GOTO, GT, IF, IFDEF, IFNDEF, IN, INHERITS, INTEGER,
130                        INTERFACE, LE, LIST, LT, NAME, NE, NODEBUG, NONE, NOT, OBJECT, OR, OVERRIDE, PACKAGE, PARENT, PRIVATE,
131                        PUBLIC, REAL, REPEAT, RETURN, SCRIPT, SET, STRING, SUPER, SWITCH, THEN, THIS, TO, UNDEFINED, UNTIL, USING,
132                        WHILE);
133
134        /** Possible modifier for function or attributes */
135        private static final EnumSet<ETokenType> MODIFIER = EnumSet.of(OVERRIDE, PUBLIC, PRIVATE);
136
137        /** Constructor. */
138        public OScriptShallowParser() {
139                super(EOScriptParserState.class, EOScriptParserState.TOP_LEVEL);
140                createTopLevelRules();
141                createVariableRules();
142                createFunctionRules();
143                createScriptRules();
144                createRulesForStatements();
145        }
146
147        /**
148         * Rules for every variable declaration and usage.
149         */
150        private void createVariableRules() {
151                // Global variables
152                inState(TOP_LEVEL).subRecognizer(getTypeRecognizer()).subRecognizer(getVariableRecognizer())
153                                .createNode(ATTRIBUTE, GLOBAL_VARIABLE, 0).endNode();
154
155                // Attributes
156                inState(IN_OBJECT, IN_SCRIPT).optional(MODIFIER).optionalSubRecognizer(getTypeRecognizer())
157                                .subRecognizer(getVariableRecognizer()).createNode(ATTRIBUTE, SubTypeNames.ATTRIBUTE, 0).endNode();
158
159                // Local variables
160                inState(IN_FUNCTION).subRecognizer(getTypeRecognizer()).subRecognizer(getVariableRecognizer())
161                                .createNode(STATEMENT, LOCAL_VARIABLE, 0).endNode();
162                inState(IN_FUNCTION).subRecognizer(getVariableNameRecognizer()).subRecognizer(getDeclarationRecognizer())
163                                .createNode(STATEMENT, LOCAL_VARIABLE, 0).endNode();
164
165                // Assignments
166                inAnyState().subRecognizer(getVariableNameRecognizer()).subRecognizer(getAssignmentRecognizer())
167                                .createNode(STATEMENT, ASSIGNMENT, 0).endNode();
168        }
169
170        /**
171         * Recognizer for the usage of a variable. It checks if it is a valid variable
172         * name and end.
173         */
174        private RecognizerBase<EOScriptParserState> getVariableRecognizer() {
175                return createRecognizer(
176                                start -> start.subRecognizer(getVariableNameRecognizer()).subRecognizer(getStatementEndRecognizer()));
177        }
178
179        /**
180         * Recognizer for the name or way a variable is adressed. This also includes
181         * stuff like: test.test().test
182         */
183        private RecognizerBase<EOScriptParserState> getVariableNameRecognizer() {
184                return createRecognizer(start -> {
185                        RecognizerBase<EOScriptParserState> prefix = start.optional(EnumSet.of(DOLLAR, DOT)).markStart()
186                                        .optionalSubRecognizer(getMethodCallRecognizer(false)).optional(DOT).repeated(IDENTIFIER, DOT);
187
188                        // Munich RE's scripts has some weird object property assignment
189                        // <code>a.(b)=c<code>
190                        prefix.repeated(IDENTIFIER, DOT).optional(IDENTIFIER, DOT).sequence(LPAREN).skipToWithNesting(RPAREN,
191                                        LPAREN, RPAREN);
192                        prefix.sequence(IDENTIFIER);
193                });
194        }
195
196        /** Recognizer for a variable declaration */
197        private RecognizerBase<EOScriptParserState> getDeclarationRecognizer() {
198                return createRecognizer(start -> {
199                        start.sequence(COMMA);
200                        start.sequence(STATEMENT_END_TOKENS);
201                });
202        }
203
204        /** Recognizer for an assignment starting with an '='. */
205        private RecognizerBase<EOScriptParserState> getAssignmentRecognizer() {
206                return createRecognizer(start -> {
207                        RecognizerBase<EOScriptParserState> prefix = start.sequence(EQUAL);
208                        prefix.subRecognizer(getMethodCallRecognizer(false)).repeated(DOT, IDENTIFIER);
209                        prefix.sequence(LBRACE).skipToWithNesting(RBRACE, LBRACE, RBRACE);
210                        prefix.sequence(LBRACK).skipToWithNesting(RBRACK, LBRACK, RBRACK);
211                        prefix.skipTo(STATEMENT_END_TOKENS);
212                });
213        }
214
215        /**
216         * Returns a recognizer which identifies if it is a valid statement end. This
217         * recognizer starts after the variable identifier.
218         * 
219         * String a -->| = 3 (so here it would recognize every valid part which can come
220         * after "String a"
221         */
222        private RecognizerBase<EOScriptParserState> getStatementEndRecognizer() {
223                return createRecognizer(start -> {
224                        start.subRecognizer(getDeclarationRecognizer());
225                        start.subRecognizer(getAssignmentRecognizer());
226                });
227        }
228
229        /**
230         * A recognizer for the declaration of a variable with possible modifier and
231         * type. Stops before the identifier.
232         */
233        private RecognizerBase<EOScriptParserState> getTypeRecognizer() {
234                return createRecognizer(start -> start.optional(MODIFIER).sequence(VARIABLE_TYPES).sequenceBefore(IDENTIFIER));
235        }
236
237        /**
238         * Recognizer for a method call. This covers also multiple chained method calls
239         * and calls over multiple lines.
240         *
241         * For example: test.test().test(\n, a,\n b)
242         */
243        private RecognizerBase<EOScriptParserState> getMethodCallRecognizer(boolean markStart) {
244                return createRecognizer(start -> {
245                        if (markStart) {
246                                start.optional(EnumSet.of(DOLLAR, DOT)).markStart().subRecognizer(getSimpleMethodCallRecognizer(), 1,
247                                                Integer.MAX_VALUE);
248                        } else {
249                                start.optional(EnumSet.of(DOLLAR, DOT)).subRecognizer(getSimpleMethodCallRecognizer(), 1,
250                                                Integer.MAX_VALUE);
251                        }
252                });
253        }
254
255        /**
256         * Recognizer for a simple method call like .test(), $test() or test()
257         */
258        private RecognizerBase<EOScriptParserState> getSimpleMethodCallRecognizer() {
259                return createRecognizer(start -> start.optional(DOT).repeated(IDENTIFIER, DOT).sequence(IDENTIFIER, LPAREN)
260                                .skipToWithNesting(RPAREN, LPAREN, RPAREN));
261        }
262
263        /**
264         * Rules for top-level declarations: package and object statements. These are
265         * according to the new source format.
266         */
267        private void createTopLevelRules() {
268                // Package
269                inState(TOP_LEVEL).sequence(PACKAGE).skipTo(STATEMENT_END_TOKENS)
270                                .createNode(MODULE, SubTypeNames.PACKAGE, new Region(1, -2)).endNode();
271
272                // Object
273                inState(TOP_LEVEL).sequence(PUBLIC, OBJECT, IDENTIFIER).markStart().createNode(TYPE, SubTypeNames.OBJECT, -1)
274                                .optional(INHERITS, IDENTIFIER).repeated(DOUBLE_COLON, IDENTIFIER).parseUntil(IN_OBJECT).sequence(END)
275                                .endNode();
276
277                inState(TOP_LEVEL).sequence(EnumSet.of(NAME, PARENT, ADDFEATURE)).skipTo(STATEMENT_END_TOKENS)
278                                .createNode(META, 0).endNode();
279
280                // Preprocessor directives
281                inAnyState().sequence(PREPROCESSOR_DIRECTIVE).createNode(EShallowEntityType.META, 0).endNode();
282        }
283
284        /** Rule for a script */
285        private void createScriptRules() {
286                inState(TOP_LEVEL, IN_OBJECT).optional(EnumSet.of(OVERRIDE, PUBLIC, PRIVATE))
287                                .sequence(SCRIPT, OSCRIPT_IDENTIFIERS).createNode(TYPE, SubTypeNames.SCRIPT, -1).parseUntil(IN_SCRIPT)
288                                .sequence(ENDSCRIPT).endNode();
289        }
290
291        /** Rule for a function */
292        private void createFunctionRules() {
293                inState(TOP_LEVEL, IN_OBJECT, IN_SCRIPT).optional(MODIFIER)
294                                .sequence(FUNCTION,
295                                                EnumSet.of(BOOLEAN, INTEGER, REAL, STRING, OBJECT, ASSOC, LIST, DATE, DYNAMIC, VOID),
296                                                OSCRIPT_IDENTIFIERS, LPAREN)
297                                .createNode(METHOD, SubTypeNames.FUNCTION, 2).skipToWithNesting(RPAREN, LPAREN, RPAREN)
298                                .parseUntil(IN_FUNCTION).sequence(END).endNode();
299        }
300
301        /** Rules for function and script statements */
302        private void createRulesForStatements() {
303                createRulesForConditionStatements();
304                createRulesForLoopStatements();
305
306                // Labels and goto jumps
307                inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(IDENTIFIER, COLON).createNode(STATEMENT, SubTypeNames.LABEL)
308                                .optional(STATEMENT_END_TOKENS).endNode();
309                inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(GOTO, IDENTIFIER).createNode(STATEMENT, -1)
310                                .optional(STATEMENT_END_TOKENS).endNode();
311
312                // Simple keywords
313                inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(EnumSet.of(RETURN, BREAK, CONTINUE, BREAKIF, CONTINUEIF))
314                                .createNode(STATEMENT, 0).skipNested(LPAREN, RPAREN).skipTo(STATEMENT_END_TOKENS).endNode();
315
316                // Label
317                inAnyState().sequence(IDENTIFIER, COLON).createNode(STATEMENT, SubTypeNames.LABEL, 0)
318                                .skipTo(STATEMENT_END_TOKENS).endNode();
319
320                // Method calls
321                inAnyState().subRecognizer(getMethodCallRecognizer(true)).createNode(STATEMENT, SIMPLE_STATEMENT, 0).endNode();
322        }
323
324        /** Create rules for condition statements */
325        private void createRulesForConditionStatements() {
326                // If Statement
327                createIfRules(IF);
328                createIfRules(ELSE);
329                createIfRules(ELSEIF);
330
331                // Switch
332                inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(SWITCH).createNode(STATEMENT, 0)
333                                .skipToWithNesting(STATEMENT_END_TOKENS, LPAREN, RPAREN).parseUntil(IN_FUNCTION).sequence(END)
334                                .endNode();
335                inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(CASE)
336                                .skipToWithNesting(STATEMENT_END_TOKENS, LPAREN, RPAREN)
337                                .createNode(EShallowEntityType.META, new int[] { 0, 1 }).parseUntil(IN_FUNCTION).sequence(END)
338                                .endNode();
339                inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(DEFAULT, STATEMENT_END_TOKENS)
340                                .createNode(EShallowEntityType.META, 0).parseUntil(IN_FUNCTION).sequence(END).endNode();
341        }
342
343        /** Create rules for If/else-if conditions */
344        private void createIfRules(ETokenType conditionToken) {
345                RecognizerBase<EOScriptParserState> ifRecognizer = inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT)
346                                .sequence(conditionToken).skipToWithNesting(STATEMENT_END_TOKENS, LPAREN, RPAREN)
347                                .createNode(STATEMENT, 0).parseUntil(IN_FUNCTION);
348                if (conditionToken == ELSE) {
349                        ifRecognizer.sequence(END).endNode();
350                        return;
351                }
352
353                ifRecognizer.sequenceBefore(EnumSet.of(ELSE, ELSEIF)).endNodeWithContinuation();
354                ifRecognizer.sequence(END).endNode();
355        }
356
357        /** Create rules for loop statements */
358        private void createRulesForLoopStatements() {
359                // While
360                inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(WHILE).createNode(STATEMENT, 0).skipNested(LPAREN, RPAREN)
361                                .parseUntil(IN_FUNCTION).sequence(END).endNode();
362
363                // Repeat
364                inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(REPEAT).createNode(STATEMENT, 0).parseUntil(IN_FUNCTION)
365                                .sequence(WHILE).skipNested(LPAREN, RPAREN).endNode();
366
367                // For...each, C-style and structured for loops
368                RecognizerBase<EOScriptParserState> forRecognizer = inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(FOR)
369                                .markStart();
370                forRecognizer.sequence(IDENTIFIER).createNode(STATEMENT, -2).skipNested(LPAREN, RPAREN)
371                                .skipTo(STATEMENT_END_TOKENS).parseUntil(IN_FUNCTION).sequence(END).endNode();
372                forRecognizer.skipNested(LPAREN, RPAREN).createNode(STATEMENT, -3).parseUntil(IN_FUNCTION).sequence(END)
373                                .endNode();
374        }
375
376}