001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.ocaml;
018
019import static eu.cqse.check.framework.scanner.ETokenType.BEGIN;
020import static eu.cqse.check.framework.scanner.ETokenType.CLASS;
021import static eu.cqse.check.framework.scanner.ETokenType.COLONGREATER;
022import static eu.cqse.check.framework.scanner.ETokenType.DO;
023import static eu.cqse.check.framework.scanner.ETokenType.DONE;
024import static eu.cqse.check.framework.scanner.ETokenType.DOT;
025import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
026import static eu.cqse.check.framework.scanner.ETokenType.END;
027import static eu.cqse.check.framework.scanner.ETokenType.EQ;
028import static eu.cqse.check.framework.scanner.ETokenType.EXCEPTION;
029import static eu.cqse.check.framework.scanner.ETokenType.EXCLAMATION;
030import static eu.cqse.check.framework.scanner.ETokenType.FALSE;
031import static eu.cqse.check.framework.scanner.ETokenType.FLOATING_POINT_LITERAL;
032import static eu.cqse.check.framework.scanner.ETokenType.FOR;
033import static eu.cqse.check.framework.scanner.ETokenType.FUN;
034import static eu.cqse.check.framework.scanner.ETokenType.FUNCTION;
035import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
036import static eu.cqse.check.framework.scanner.ETokenType.IF;
037import static eu.cqse.check.framework.scanner.ETokenType.IN;
038import static eu.cqse.check.framework.scanner.ETokenType.INHERIT;
039import static eu.cqse.check.framework.scanner.ETokenType.INTEGER_LITERAL;
040import static eu.cqse.check.framework.scanner.ETokenType.LBRACK;
041import static eu.cqse.check.framework.scanner.ETokenType.LET;
042import static eu.cqse.check.framework.scanner.ETokenType.LPAREN;
043import static eu.cqse.check.framework.scanner.ETokenType.MATCH;
044import static eu.cqse.check.framework.scanner.ETokenType.METHOD;
045import static eu.cqse.check.framework.scanner.ETokenType.MINUS;
046import static eu.cqse.check.framework.scanner.ETokenType.MINUSGREATER;
047import static eu.cqse.check.framework.scanner.ETokenType.MODULE;
048import static eu.cqse.check.framework.scanner.ETokenType.NEW;
049import static eu.cqse.check.framework.scanner.ETokenType.OBJECT;
050import static eu.cqse.check.framework.scanner.ETokenType.OPEN;
051import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE;
052import static eu.cqse.check.framework.scanner.ETokenType.RBRACK;
053import static eu.cqse.check.framework.scanner.ETokenType.REC;
054import static eu.cqse.check.framework.scanner.ETokenType.REF;
055import static eu.cqse.check.framework.scanner.ETokenType.RPAREN;
056import static eu.cqse.check.framework.scanner.ETokenType.SHARP;
057import static eu.cqse.check.framework.scanner.ETokenType.STRUCT;
058import static eu.cqse.check.framework.scanner.ETokenType.THEN;
059import static eu.cqse.check.framework.scanner.ETokenType.TO;
060import static eu.cqse.check.framework.scanner.ETokenType.TRUE;
061import static eu.cqse.check.framework.scanner.ETokenType.TRY;
062import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
063import static eu.cqse.check.framework.scanner.ETokenType.WITH;
064import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_EXPRESSION;
065import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_TYPE;
066import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.TOP_LEVEL;
067
068import java.util.EnumSet;
069
070import org.conqat.lib.commons.region.Region;
071
072import eu.cqse.check.framework.scanner.ETokenType;
073import eu.cqse.check.framework.shallowparser.SubTypeNames;
074import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
075import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
076import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
077import eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates;
078
079/**
080 */
081
082/** A shallow parser for OCaml. */
083public class OcamlShallowParser extends ShallowParserBase<EGenericParserStates> {
084
085        /** Constructor. */
086        public OcamlShallowParser() {
087                super(EGenericParserStates.class, TOP_LEVEL);
088                createDefinitionRules();
089                createTypeRules();
090                createClassElementsRules();
091                createNestingExpressionRules();
092                createNonNestingExpressionRules();
093                createSimplifiedRecExpressionRules();
094                // Create an incomplete node if something is still unrecognized.
095                inAnyState().markStart().sequence(EnumSet.allOf(ETokenType.class)).createNode(EShallowEntityType.META, 0, null);
096        }
097
098        /** Definition rules. */
099        private void createDefinitionRules() {
100                inState(TOP_LEVEL).sequence(OPEN).createNode(EShallowEntityType.META, "open", null).sequence(IDENTIFIER)
101                                .repeated(DOT, IDENTIFIER).endNode();
102                inState(TOP_LEVEL).sequence(MODULE).markStart().sequence(IDENTIFIER).skipTo(EQ).sequence(STRUCT)
103                                .createNode(EShallowEntityType.META, "module-def", 0).skipTo(END).endNode();
104                inState(TOP_LEVEL).sequence(MODULE).markStart().sequence(IDENTIFIER).skipTo(EQ).sequence(IDENTIFIER)
105                                .createNode(EShallowEntityType.META, "module-def", 0).repeated(DOT, IDENTIFIER).endNode();
106                inState(TOP_LEVEL).sequence(EXCEPTION, IDENTIFIER);
107                inState(TOP_LEVEL).sequence(LET).optional(REC).markStart().sequence(IDENTIFIER)
108                                .createNode(EShallowEntityType.METHOD, "let-decl", 0).skipTo(EQ).parseOnce(IN_EXPRESSION).endNode();
109        }
110
111        /** Class definition rule */
112        private void createTypeRules() {
113                inState(TOP_LEVEL).sequence(CLASS).markStart().createNode(EShallowEntityType.TYPE, SubTypeNames.CLASS, 0)
114                                .sequence(IDENTIFIER).skipTo(EQ).sequence(OBJECT).optional(LPAREN, IDENTIFIER, RPAREN)
115                                .parseUntil(IN_TYPE).sequence(END).endNode();
116        }
117
118        /** Create rules for class elements, i.e., methods. */
119        private void createClassElementsRules() {
120                inState(IN_TYPE).sequence(METHOD).optional(PRIVATE).markStart().sequence(IDENTIFIER) // method
121                                                                                                                                                                                                // name
122                                .createNode(EShallowEntityType.METHOD, SubTypeNames.METHOD, 0).skipTo(EQ).parseOnce(IN_EXPRESSION) // method
123                                                                                                                                                                                                                                        // body
124                                .endNode();
125                inState(IN_TYPE).sequence(INHERIT, IDENTIFIER).repeated(DOT, IDENTIFIER);
126        }
127
128        /** Rules to handle nesting expressions, i.e., if, while, for, match, .. */
129        private void createNestingExpressionRules() {
130
131                inState(IN_EXPRESSION).sequence(IF) // 4a: if with else branch
132                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.IF).parseOnce(IN_EXPRESSION).sequence(THEN)
133                                .parseOnce(IN_EXPRESSION).sequence(ELSE).parseOnce(IN_EXPRESSION).endNode();
134
135                inState(IN_EXPRESSION).sequence(IF) // 4b: if without else branch
136                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.IF).parseOnce(IN_EXPRESSION).sequence(THEN)
137                                .parseOnce(IN_EXPRESSION).endNode();
138
139                inState(IN_EXPRESSION).sequence(WHILE) // 4c
140                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.WHILE).parseOnce(IN_EXPRESSION).sequence(DO)
141                                .parseOnce(IN_EXPRESSION).sequence(DONE).endNode();
142
143                inState(IN_EXPRESSION).sequence(FOR, IDENTIFIER, EQ) // 4d
144                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.FOR).parseOnce(IN_EXPRESSION).sequence(TO)
145                                .parseOnce(IN_EXPRESSION).sequence(DO).parseOnce(IN_EXPRESSION).sequence(DONE).endNode();
146
147                // rule 4e: match expression
148                RecognizerBase<EGenericParserStates> recognizedMatch = inState(IN_EXPRESSION).sequence(MATCH)
149                                .createNode(EShallowEntityType.STATEMENT, "match-expr").parseOnce(IN_EXPRESSION).sequence(WITH);
150                patternMatchingRecognizer(recognizedMatch).endNode();
151
152                RecognizerBase<EGenericParserStates> recognizedFunction = // 4f
153                                inState(IN_EXPRESSION).sequence(FUNCTION).createNode(EShallowEntityType.STATEMENT, "function-expr");
154                patternMatchingRecognizer(recognizedFunction).endNode();
155
156                inState(IN_EXPRESSION).sequence(FUN) // 4g
157                                .createNode(EShallowEntityType.STATEMENT, "fun-expr").skipTo(MINUSGREATER).parseOnce(IN_EXPRESSION)
158                                .endNode();
159
160                patternMatchingRecognizer(inState(IN_EXPRESSION).sequence(TRY) // 4h
161                                .parseOnce(IN_EXPRESSION).sequence(WITH)).endNode();
162
163                inState(IN_EXPRESSION).sequence(OBJECT) // rule 4i
164                                .createNode(EShallowEntityType.STATEMENT, "object-expr").parseUntil(IN_TYPE).sequence(END).endNode();
165        }
166
167        /** Rules to handle non-nesting expressions */
168        private void createNonNestingExpressionRules() {
169
170                inState(IN_EXPRESSION).sequence(BEGIN).parseOnce(IN_EXPRESSION) // 4v
171                                .sequence(END);
172
173                inState(IN_EXPRESSION).sequence(LBRACK) // rule 4n: list expressions
174                                .createNode(EShallowEntityType.STATEMENT, "list-expr").skipToWithNesting(RBRACK, LBRACK, RBRACK)
175                                .endNode();
176
177                inState(IN_EXPRESSION).sequence(LET).optional(REC).markStart() // 4q
178                                .sequence(IDENTIFIER).createNode(EShallowEntityType.STATEMENT, "let-expr", 0).skipTo(EQ)
179                                .parseOnce(IN_EXPRESSION).sequence(IN).parseOnce(IN_EXPRESSION).endNode();
180
181                inState(IN_EXPRESSION).sequence(NEW).markStart().sequence(IDENTIFIER).repeated(DOT, IDENTIFIER)
182                                .createNode(EShallowEntityType.STATEMENT, "new-expr", 0).endNode();
183
184        }
185
186        /**
187         * Simplified expression rules. (TODO: general rules would be left-recursive.)
188         */
189        private void createSimplifiedRecExpressionRules() {
190
191                inState(IN_EXPRESSION).sequence(IDENTIFIER) // 4s
192                                .sequence(SHARP, IDENTIFIER).createNode(EShallowEntityType.STATEMENT, "method-call-expr", 0)
193                                .sequence(IDENTIFIER).endNode();
194
195                inState(IN_EXPRESSION).sequence(getUnaryConstructors()) // rule 4u
196                                .parseOnce(IN_EXPRESSION);
197
198                inState(IN_EXPRESSION).sequence(IDENTIFIER) // rule 4j
199                                .repeated(DOT, IDENTIFIER).createNode(EShallowEntityType.STATEMENT, "id-expr", new Region(0, -1))
200                                .endNode();
201
202                inState(IN_EXPRESSION).sequence(getConstantTokens()) // rule 4k
203                                .createNode(EShallowEntityType.STATEMENT, "const-expr", 0).endNode();
204
205                inState(IN_EXPRESSION).markStart().sequence(LPAREN, RPAREN) // rule 4k
206                                .createNode(EShallowEntityType.STATEMENT, "const-expr", new Region(0, -1)).endNode();
207
208                inState(IN_EXPRESSION).sequence(LPAREN).parseOnce(IN_EXPRESSION) // 4l
209                                .sequence(RPAREN);
210
211                inState(IN_EXPRESSION).sequence(LPAREN).parseOnce(IN_EXPRESSION) // 4t
212                                .sequence(COLONGREATER, IDENTIFIER).repeated(DOT, IDENTIFIER).sequence(RPAREN);
213
214        }
215
216        /** Recognizes a pattern-matching construct */
217        private static RecognizerBase<EGenericParserStates> patternMatchingRecognizer(
218                        RecognizerBase<EGenericParserStates> currentState) {
219                return currentState.skipTo(MINUSGREATER).parseOnce(IN_EXPRESSION);
220        }
221
222        /**
223         * Returns a set of tokens representing constant values, except () and [].
224         */
225        private static EnumSet<ETokenType> getConstantTokens() {
226                return EnumSet.of(INTEGER_LITERAL, FLOATING_POINT_LITERAL, IDENTIFIER, FALSE, TRUE);
227        }
228
229        /** Returns a set of unary constructors */
230        private static EnumSet<ETokenType> getUnaryConstructors() {
231                return EnumSet.of(REF, EXCLAMATION, MINUS);
232        }
233}