001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.ocaml; 018 019import static eu.cqse.check.framework.scanner.ETokenType.BEGIN; 020import static eu.cqse.check.framework.scanner.ETokenType.CLASS; 021import static eu.cqse.check.framework.scanner.ETokenType.COLONGREATER; 022import static eu.cqse.check.framework.scanner.ETokenType.DO; 023import static eu.cqse.check.framework.scanner.ETokenType.DONE; 024import static eu.cqse.check.framework.scanner.ETokenType.DOT; 025import static eu.cqse.check.framework.scanner.ETokenType.ELSE; 026import static eu.cqse.check.framework.scanner.ETokenType.END; 027import static eu.cqse.check.framework.scanner.ETokenType.EQ; 028import static eu.cqse.check.framework.scanner.ETokenType.EXCEPTION; 029import static eu.cqse.check.framework.scanner.ETokenType.EXCLAMATION; 030import static eu.cqse.check.framework.scanner.ETokenType.FALSE; 031import static eu.cqse.check.framework.scanner.ETokenType.FLOATING_POINT_LITERAL; 032import static eu.cqse.check.framework.scanner.ETokenType.FOR; 033import static eu.cqse.check.framework.scanner.ETokenType.FUN; 034import static eu.cqse.check.framework.scanner.ETokenType.FUNCTION; 035import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER; 036import static eu.cqse.check.framework.scanner.ETokenType.IF; 037import static eu.cqse.check.framework.scanner.ETokenType.IN; 038import static eu.cqse.check.framework.scanner.ETokenType.INHERIT; 039import static eu.cqse.check.framework.scanner.ETokenType.INTEGER_LITERAL; 040import static eu.cqse.check.framework.scanner.ETokenType.LBRACK; 041import static eu.cqse.check.framework.scanner.ETokenType.LET; 042import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 043import static eu.cqse.check.framework.scanner.ETokenType.MATCH; 044import static eu.cqse.check.framework.scanner.ETokenType.METHOD; 045import static eu.cqse.check.framework.scanner.ETokenType.MINUS; 046import static eu.cqse.check.framework.scanner.ETokenType.MINUSGREATER; 047import static eu.cqse.check.framework.scanner.ETokenType.MODULE; 048import static eu.cqse.check.framework.scanner.ETokenType.NEW; 049import static eu.cqse.check.framework.scanner.ETokenType.OBJECT; 050import static eu.cqse.check.framework.scanner.ETokenType.OPEN; 051import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE; 052import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 053import static eu.cqse.check.framework.scanner.ETokenType.REC; 054import static eu.cqse.check.framework.scanner.ETokenType.REF; 055import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 056import static eu.cqse.check.framework.scanner.ETokenType.SHARP; 057import static eu.cqse.check.framework.scanner.ETokenType.STRUCT; 058import static eu.cqse.check.framework.scanner.ETokenType.THEN; 059import static eu.cqse.check.framework.scanner.ETokenType.TO; 060import static eu.cqse.check.framework.scanner.ETokenType.TRUE; 061import static eu.cqse.check.framework.scanner.ETokenType.TRY; 062import static eu.cqse.check.framework.scanner.ETokenType.WHILE; 063import static eu.cqse.check.framework.scanner.ETokenType.WITH; 064import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_EXPRESSION; 065import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_TYPE; 066import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.TOP_LEVEL; 067 068import java.util.EnumSet; 069 070import org.conqat.lib.commons.region.Region; 071 072import eu.cqse.check.framework.scanner.ETokenType; 073import eu.cqse.check.framework.shallowparser.SubTypeNames; 074import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 075import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 076import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase; 077import eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates; 078 079/** 080 */ 081 082/** A shallow parser for OCaml. */ 083public class OcamlShallowParser extends ShallowParserBase<EGenericParserStates> { 084 085 /** Constructor. */ 086 public OcamlShallowParser() { 087 super(EGenericParserStates.class, TOP_LEVEL); 088 createDefinitionRules(); 089 createTypeRules(); 090 createClassElementsRules(); 091 createNestingExpressionRules(); 092 createNonNestingExpressionRules(); 093 createSimplifiedRecExpressionRules(); 094 // Create an incomplete node if something is still unrecognized. 095 inAnyState().markStart().sequence(EnumSet.allOf(ETokenType.class)).createNode(EShallowEntityType.META, 0, null); 096 } 097 098 /** Definition rules. */ 099 private void createDefinitionRules() { 100 inState(TOP_LEVEL).sequence(OPEN).createNode(EShallowEntityType.META, "open", null).sequence(IDENTIFIER) 101 .repeated(DOT, IDENTIFIER).endNode(); 102 inState(TOP_LEVEL).sequence(MODULE).markStart().sequence(IDENTIFIER).skipTo(EQ).sequence(STRUCT) 103 .createNode(EShallowEntityType.META, "module-def", 0).skipTo(END).endNode(); 104 inState(TOP_LEVEL).sequence(MODULE).markStart().sequence(IDENTIFIER).skipTo(EQ).sequence(IDENTIFIER) 105 .createNode(EShallowEntityType.META, "module-def", 0).repeated(DOT, IDENTIFIER).endNode(); 106 inState(TOP_LEVEL).sequence(EXCEPTION, IDENTIFIER); 107 inState(TOP_LEVEL).sequence(LET).optional(REC).markStart().sequence(IDENTIFIER) 108 .createNode(EShallowEntityType.METHOD, "let-decl", 0).skipTo(EQ).parseOnce(IN_EXPRESSION).endNode(); 109 } 110 111 /** Class definition rule */ 112 private void createTypeRules() { 113 inState(TOP_LEVEL).sequence(CLASS).markStart().createNode(EShallowEntityType.TYPE, SubTypeNames.CLASS, 0) 114 .sequence(IDENTIFIER).skipTo(EQ).sequence(OBJECT).optional(LPAREN, IDENTIFIER, RPAREN) 115 .parseUntil(IN_TYPE).sequence(END).endNode(); 116 } 117 118 /** Create rules for class elements, i.e., methods. */ 119 private void createClassElementsRules() { 120 inState(IN_TYPE).sequence(METHOD).optional(PRIVATE).markStart().sequence(IDENTIFIER) // method 121 // name 122 .createNode(EShallowEntityType.METHOD, SubTypeNames.METHOD, 0).skipTo(EQ).parseOnce(IN_EXPRESSION) // method 123 // body 124 .endNode(); 125 inState(IN_TYPE).sequence(INHERIT, IDENTIFIER).repeated(DOT, IDENTIFIER); 126 } 127 128 /** Rules to handle nesting expressions, i.e., if, while, for, match, .. */ 129 private void createNestingExpressionRules() { 130 131 inState(IN_EXPRESSION).sequence(IF) // 4a: if with else branch 132 .createNode(EShallowEntityType.STATEMENT, SubTypeNames.IF).parseOnce(IN_EXPRESSION).sequence(THEN) 133 .parseOnce(IN_EXPRESSION).sequence(ELSE).parseOnce(IN_EXPRESSION).endNode(); 134 135 inState(IN_EXPRESSION).sequence(IF) // 4b: if without else branch 136 .createNode(EShallowEntityType.STATEMENT, SubTypeNames.IF).parseOnce(IN_EXPRESSION).sequence(THEN) 137 .parseOnce(IN_EXPRESSION).endNode(); 138 139 inState(IN_EXPRESSION).sequence(WHILE) // 4c 140 .createNode(EShallowEntityType.STATEMENT, SubTypeNames.WHILE).parseOnce(IN_EXPRESSION).sequence(DO) 141 .parseOnce(IN_EXPRESSION).sequence(DONE).endNode(); 142 143 inState(IN_EXPRESSION).sequence(FOR, IDENTIFIER, EQ) // 4d 144 .createNode(EShallowEntityType.STATEMENT, SubTypeNames.FOR).parseOnce(IN_EXPRESSION).sequence(TO) 145 .parseOnce(IN_EXPRESSION).sequence(DO).parseOnce(IN_EXPRESSION).sequence(DONE).endNode(); 146 147 // rule 4e: match expression 148 RecognizerBase<EGenericParserStates> recognizedMatch = inState(IN_EXPRESSION).sequence(MATCH) 149 .createNode(EShallowEntityType.STATEMENT, "match-expr").parseOnce(IN_EXPRESSION).sequence(WITH); 150 patternMatchingRecognizer(recognizedMatch).endNode(); 151 152 RecognizerBase<EGenericParserStates> recognizedFunction = // 4f 153 inState(IN_EXPRESSION).sequence(FUNCTION).createNode(EShallowEntityType.STATEMENT, "function-expr"); 154 patternMatchingRecognizer(recognizedFunction).endNode(); 155 156 inState(IN_EXPRESSION).sequence(FUN) // 4g 157 .createNode(EShallowEntityType.STATEMENT, "fun-expr").skipTo(MINUSGREATER).parseOnce(IN_EXPRESSION) 158 .endNode(); 159 160 patternMatchingRecognizer(inState(IN_EXPRESSION).sequence(TRY) // 4h 161 .parseOnce(IN_EXPRESSION).sequence(WITH)).endNode(); 162 163 inState(IN_EXPRESSION).sequence(OBJECT) // rule 4i 164 .createNode(EShallowEntityType.STATEMENT, "object-expr").parseUntil(IN_TYPE).sequence(END).endNode(); 165 } 166 167 /** Rules to handle non-nesting expressions */ 168 private void createNonNestingExpressionRules() { 169 170 inState(IN_EXPRESSION).sequence(BEGIN).parseOnce(IN_EXPRESSION) // 4v 171 .sequence(END); 172 173 inState(IN_EXPRESSION).sequence(LBRACK) // rule 4n: list expressions 174 .createNode(EShallowEntityType.STATEMENT, "list-expr").skipToWithNesting(RBRACK, LBRACK, RBRACK) 175 .endNode(); 176 177 inState(IN_EXPRESSION).sequence(LET).optional(REC).markStart() // 4q 178 .sequence(IDENTIFIER).createNode(EShallowEntityType.STATEMENT, "let-expr", 0).skipTo(EQ) 179 .parseOnce(IN_EXPRESSION).sequence(IN).parseOnce(IN_EXPRESSION).endNode(); 180 181 inState(IN_EXPRESSION).sequence(NEW).markStart().sequence(IDENTIFIER).repeated(DOT, IDENTIFIER) 182 .createNode(EShallowEntityType.STATEMENT, "new-expr", 0).endNode(); 183 184 } 185 186 /** 187 * Simplified expression rules. (TODO: general rules would be left-recursive.) 188 */ 189 private void createSimplifiedRecExpressionRules() { 190 191 inState(IN_EXPRESSION).sequence(IDENTIFIER) // 4s 192 .sequence(SHARP, IDENTIFIER).createNode(EShallowEntityType.STATEMENT, "method-call-expr", 0) 193 .sequence(IDENTIFIER).endNode(); 194 195 inState(IN_EXPRESSION).sequence(getUnaryConstructors()) // rule 4u 196 .parseOnce(IN_EXPRESSION); 197 198 inState(IN_EXPRESSION).sequence(IDENTIFIER) // rule 4j 199 .repeated(DOT, IDENTIFIER).createNode(EShallowEntityType.STATEMENT, "id-expr", new Region(0, -1)) 200 .endNode(); 201 202 inState(IN_EXPRESSION).sequence(getConstantTokens()) // rule 4k 203 .createNode(EShallowEntityType.STATEMENT, "const-expr", 0).endNode(); 204 205 inState(IN_EXPRESSION).markStart().sequence(LPAREN, RPAREN) // rule 4k 206 .createNode(EShallowEntityType.STATEMENT, "const-expr", new Region(0, -1)).endNode(); 207 208 inState(IN_EXPRESSION).sequence(LPAREN).parseOnce(IN_EXPRESSION) // 4l 209 .sequence(RPAREN); 210 211 inState(IN_EXPRESSION).sequence(LPAREN).parseOnce(IN_EXPRESSION) // 4t 212 .sequence(COLONGREATER, IDENTIFIER).repeated(DOT, IDENTIFIER).sequence(RPAREN); 213 214 } 215 216 /** Recognizes a pattern-matching construct */ 217 private static RecognizerBase<EGenericParserStates> patternMatchingRecognizer( 218 RecognizerBase<EGenericParserStates> currentState) { 219 return currentState.skipTo(MINUSGREATER).parseOnce(IN_EXPRESSION); 220 } 221 222 /** 223 * Returns a set of tokens representing constant values, except () and []. 224 */ 225 private static EnumSet<ETokenType> getConstantTokens() { 226 return EnumSet.of(INTEGER_LITERAL, FLOATING_POINT_LITERAL, IDENTIFIER, FALSE, TRUE); 227 } 228 229 /** Returns a set of unary constructors */ 230 private static EnumSet<ETokenType> getUnaryConstructors() { 231 return EnumSet.of(REF, EXCLAMATION, MINUS); 232 } 233}