001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.matlab;
018
019import static eu.cqse.check.framework.scanner.ETokenType.AND;
020import static eu.cqse.check.framework.scanner.ETokenType.BREAK;
021import static eu.cqse.check.framework.scanner.ETokenType.CASE;
022import static eu.cqse.check.framework.scanner.ETokenType.CATCH;
023import static eu.cqse.check.framework.scanner.ETokenType.CLASSDEF;
024import static eu.cqse.check.framework.scanner.ETokenType.COMMA;
025import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE;
026import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
027import static eu.cqse.check.framework.scanner.ETokenType.ELSEIF;
028import static eu.cqse.check.framework.scanner.ETokenType.END;
029import static eu.cqse.check.framework.scanner.ETokenType.ENUMERATION;
030import static eu.cqse.check.framework.scanner.ETokenType.EOL;
031import static eu.cqse.check.framework.scanner.ETokenType.EQ;
032import static eu.cqse.check.framework.scanner.ETokenType.EVENTS;
033import static eu.cqse.check.framework.scanner.ETokenType.EXCLAMATION;
034import static eu.cqse.check.framework.scanner.ETokenType.FOR;
035import static eu.cqse.check.framework.scanner.ETokenType.FUNCTION;
036import static eu.cqse.check.framework.scanner.ETokenType.GLOBAL;
037import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
038import static eu.cqse.check.framework.scanner.ETokenType.IF;
039import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
040import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
041import static eu.cqse.check.framework.scanner.ETokenType.LT;
042import static eu.cqse.check.framework.scanner.ETokenType.METHODS;
043import static eu.cqse.check.framework.scanner.ETokenType.OTHERWISE;
044import static eu.cqse.check.framework.scanner.ETokenType.PARFOR;
045import static eu.cqse.check.framework.scanner.ETokenType.PERSISTENT;
046import static eu.cqse.check.framework.scanner.ETokenType.PROPERTIES;
047import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
048import static eu.cqse.check.framework.scanner.ETokenType.RETURN;
049import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
050import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
051import static eu.cqse.check.framework.scanner.ETokenType.SWITCH;
052import static eu.cqse.check.framework.scanner.ETokenType.TRY;
053import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
054import static eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates.IN_CLASSDEF;
055import static eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates.IN_ENUMERATION;
056import static eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates.IN_EVENTS;
057import static eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates.IN_METHOD;
058import static eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates.IN_METHODS;
059import static eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates.IN_PROPERTIES;
060import static eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates.IN_SWITCH;
061import static eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates.TOP_LEVEL;
062
063import java.util.EnumSet;
064
065import eu.cqse.check.framework.scanner.ETokenType;
066import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
067import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
068import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
069import eu.cqse.check.framework.shallowparser.languages.matlab.MatlabShallowParser.EMatlabParserStates;
070
071/**
072 * A shallow parser for Matlab.
073 * 
074 * This parser currently recognizes class definitions (with events, enumeration,
075 * methods and properties). Within functions, control structures and simple
076 * statements can be parsed. Nested methods and shell escapes are supported.
077 */
078public class MatlabShallowParser extends ShallowParserBase<EMatlabParserStates> {
079
080        /** All possible states of a MatlabShallowParser. */
081        public static enum EMatlabParserStates {
082                /** Top-level state. */
083                TOP_LEVEL,
084
085                /** Inside a class definition. */
086                IN_CLASSDEF,
087
088                /** Inside a properties section. */
089                IN_PROPERTIES,
090
091                /** Inside a events section. */
092                IN_EVENTS,
093
094                /** Inside a enumeration section. */
095                IN_ENUMERATION,
096
097                /** Inside a methods section. */
098                IN_METHODS,
099
100                /** Inside a method's body. */
101                IN_METHOD,
102
103                /** Inside a switch block. */
104                IN_SWITCH
105        }
106
107        /** All tokens that are valid statement separators. */
108        private static final EnumSet<ETokenType> STATEMENT_SEPARATORS = EnumSet.of(EOL, SEMICOLON, COMMA);
109
110        /** All states in which statements can be parsed. */
111        private static final EMatlabParserStates[] STATEMENT_STATES = { TOP_LEVEL, IN_METHOD };
112
113        /** Constructor. */
114        public MatlabShallowParser() {
115                super(EMatlabParserStates.class, TOP_LEVEL);
116                createTopLevelRules();
117                createInClassDefRules();
118                createInPropertiesRules();
119                createInEventsRules();
120                createInEnumerationRules();
121                createInMethodsRules();
122                createStatementRules();
123        }
124
125        /** Creates rules for the top-level state. */
126        private void createTopLevelRules() {
127                // class definition
128                inState(TOP_LEVEL).sequence(CLASSDEF).skipToWithNesting(IDENTIFIER, LPAREN, RPAREN)
129                                .createNode(EShallowEntityType.TYPE, "class", -1).optional(LT, IDENTIFIER).repeated(AND, IDENTIFIER)
130                                .sequence(EOL).parseUntil(IN_CLASSDEF).sequence(END).endNode();
131        }
132
133        /** Creates rules for parsing class definitions. */
134        private void createInClassDefRules() {
135                // properties section
136                createInClassDefSectionRule(PROPERTIES, IN_PROPERTIES);
137
138                // methods section
139                createInClassDefSectionRule(METHODS, IN_METHODS);
140
141                // events section
142                createInClassDefSectionRule(EVENTS, IN_EVENTS);
143
144                // enumeration section
145                inState(IN_CLASSDEF).sequence(ENUMERATION, EOL).createNode(EShallowEntityType.META, 0)
146                                .parseUntil(IN_ENUMERATION).sequence(END).endNode();
147        }
148
149        /**
150         * Creates a rule for parsing a section within a class definition. The
151         * section must start with the given start token. The section's content is
152         * parsed in the given sub-state.
153         */
154        private void createInClassDefSectionRule(ETokenType startTokenType, EMatlabParserStates subState) {
155                inState(IN_CLASSDEF).sequence(startTokenType).skipToWithNesting(EOL, LPAREN, RPAREN)
156                                .createNode(EShallowEntityType.META, 0).parseUntil(subState).sequence(END).endNode();
157        }
158
159        /** Creates rules for parsing class properties. */
160        private void createInPropertiesRules() {
161                inState(IN_PROPERTIES).sequence(IDENTIFIER).createNode(EShallowEntityType.ATTRIBUTE, "attribute", 0)
162                                .skipTo(STATEMENT_SEPARATORS).endNode();
163        }
164
165        /** Creates rules for parsing class events. */
166        private void createInEventsRules() {
167                inState(IN_EVENTS).sequence(IDENTIFIER).createNode(EShallowEntityType.ATTRIBUTE, 0).sequence(EOL).endNode();
168        }
169
170        /** Creates rules for parsing enumerations. */
171        private void createInEnumerationRules() {
172                inState(IN_ENUMERATION).sequence(IDENTIFIER).createNode(EShallowEntityType.ATTRIBUTE, "enum-literal", 0)
173                                .skipToWithNesting(EnumSet.of(COMMA, EOL), LPAREN, RPAREN).endNode();
174        }
175
176        /** Creates rules for parsing function definitions/declarations. */
177        private void createInMethodsRules() {
178                // recognizer for the beginning of function definitions
179                // function definitions can also appear in top-level state or within
180                // another function's body
181                RecognizerBase<EMatlabParserStates> functionAlternative = inState(TOP_LEVEL, IN_METHOD, IN_METHODS)
182                                .sequence(FUNCTION);
183
184                // function with one return value
185                continueMethodHeadRule(functionAlternative.sequence(IDENTIFIER, EQ), true);
186
187                // function with no return value
188                continueMethodHeadRule(functionAlternative, true);
189
190                // function with multiple return values
191                continueMethodHeadRule(functionAlternative.skipToWithNesting(EQ, LBRACK, RBRACK), true);
192
193                // function declaration
194                continueMethodHeadRule(inState(IN_METHODS), false);
195        }
196
197        /** Appends rules for parsing a method's head to the given recognizer. */
198        private static void continueMethodHeadRule(RecognizerBase<EMatlabParserStates> functionRecognizer,
199                        boolean isDefinition) {
200                String subtype = "function declaration";
201                if (isDefinition) {
202                        subtype = "function";
203                }
204
205                functionRecognizer = functionRecognizer.sequence(IDENTIFIER).createNode(EShallowEntityType.METHOD, subtype, -1)
206                                .skipToWithNesting(EOL, LPAREN, RPAREN);
207                // if it is a function definition, we have to parse the function's body,
208                // as well
209                if (isDefinition) {
210                        functionRecognizer = functionRecognizer.parseUntilOrEof(IN_METHOD).sequence(END);
211                }
212                functionRecognizer.endNode();
213        }
214
215        /**
216         * Create rules for parsing all kinds of statements within function bodies.
217         */
218        private void createStatementRules() {
219                // create rule for if/elseif/else
220                createBlockRulesWithContinuation(EnumSet.of(IF, ELSEIF, ELSE), EnumSet.of(ELSEIF, ELSE));
221
222                // create rule try/catch
223                createBlockRulesWithContinuation(EnumSet.of(TRY, CATCH), EnumSet.of(CATCH));
224
225                createLoopRules();
226                createSwitchCaseRules();
227                createShellEscapeRules();
228                createSimpleStatementRule();
229        }
230
231        /**
232         * Creates a rules for parsing statements that begin with one of the given
233         * start tokens, followed by a block. The statement can be continued if one
234         * of the given continuation tokens is encountered after the block.
235         */
236        private void createBlockRulesWithContinuation(EnumSet<ETokenType> startTokens,
237                        EnumSet<ETokenType> continuationTokens) {
238                RecognizerBase<EMatlabParserStates> alternative = inState(STATEMENT_STATES).sequence(startTokens)
239                                .createNode(EShallowEntityType.STATEMENT, 0).skipTo(STATEMENT_SEPARATORS).parseUntil(IN_METHOD);
240
241                alternative.sequence(END).endNode();
242
243                alternative.sequenceBefore(continuationTokens).endNodeWithContinuation();
244        }
245
246        /** Creates rules for parsing for/parfor and while loops. */
247        private void createLoopRules() {
248                // create rule for for/parfor/while loop
249                inState(STATEMENT_STATES).sequence(EnumSet.of(FOR, PARFOR, WHILE)).createNode(EShallowEntityType.STATEMENT, 0)
250                                .skipTo(STATEMENT_SEPARATORS).parseUntil(IN_METHOD).sequence(END).endNode();
251        }
252
253        /** Creates rules for parsing switch/case structures. */
254        private void createSwitchCaseRules() {
255                inState(STATEMENT_STATES).sequence(SWITCH).createNode(EShallowEntityType.STATEMENT, 0).skipTo(EOL)
256                                .parseUntil(IN_SWITCH).sequence(END).endNode();
257
258                inState(IN_SWITCH).sequence(EnumSet.of(CASE, OTHERWISE)).createNode(EShallowEntityType.META, 0).skipTo(EOL)
259                                .parseUntil(IN_METHOD).sequenceBefore(EnumSet.of(CASE, OTHERWISE, END)).endNode();
260        }
261
262        /** Returns a set of token types, that can start a simple statement. */
263        private static EnumSet<ETokenType> getSimpleStatementTokenTypes() {
264                return EnumSet.of(IDENTIFIER, BREAK, CONTINUE, RETURN, LBRACK, PERSISTENT, GLOBAL);
265        }
266
267        /** Creates rules for parsing shell escapes. */
268        private void createShellEscapeRules() {
269                inState(STATEMENT_STATES).sequence(EXCLAMATION).createNode(EShallowEntityType.STATEMENT, "shell-escape")
270                                .skipTo(EOL).endNode();
271        }
272
273        /** Creates rules for parsing simple statements. */
274        private void createSimpleStatementRule() {
275                inState(STATEMENT_STATES).sequence(getSimpleStatementTokenTypes())
276                                .createNode(EShallowEntityType.STATEMENT, "simple-statement", 0).skipTo(STATEMENT_SEPARATORS).endNode();
277        }
278}