001/*-------------------------------------------------------------------------+ 002| | 003| Copyright (c) 2009-2017 CQSE GmbH | 004| | 005+-------------------------------------------------------------------------*/ 006package eu.cqse.check.framework.shallowparser.languages.oscript; 007 008import static eu.cqse.check.framework.scanner.ETokenType.ADDFEATURE; 009import static eu.cqse.check.framework.scanner.ETokenType.AND; 010import static eu.cqse.check.framework.scanner.ETokenType.ASSOC; 011import static eu.cqse.check.framework.scanner.ETokenType.BOOLEAN; 012import static eu.cqse.check.framework.scanner.ETokenType.BREAK; 013import static eu.cqse.check.framework.scanner.ETokenType.BREAKIF; 014import static eu.cqse.check.framework.scanner.ETokenType.BY; 015import static eu.cqse.check.framework.scanner.ETokenType.CASE; 016import static eu.cqse.check.framework.scanner.ETokenType.COLON; 017import static eu.cqse.check.framework.scanner.ETokenType.COMMA; 018import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE; 019import static eu.cqse.check.framework.scanner.ETokenType.CONTINUEIF; 020import static eu.cqse.check.framework.scanner.ETokenType.DATE; 021import static eu.cqse.check.framework.scanner.ETokenType.DEFAULT; 022import static eu.cqse.check.framework.scanner.ETokenType.DEFINE; 023import static eu.cqse.check.framework.scanner.ETokenType.DO; 024import static eu.cqse.check.framework.scanner.ETokenType.DOLLAR; 025import static eu.cqse.check.framework.scanner.ETokenType.DOT; 026import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE_COLON; 027import static eu.cqse.check.framework.scanner.ETokenType.DOWNTO; 028import static eu.cqse.check.framework.scanner.ETokenType.DYNAMIC; 029import static eu.cqse.check.framework.scanner.ETokenType.ELSE; 030import static eu.cqse.check.framework.scanner.ETokenType.ELSEIF; 031import static eu.cqse.check.framework.scanner.ETokenType.END; 032import static eu.cqse.check.framework.scanner.ETokenType.ENDIF; 033import static eu.cqse.check.framework.scanner.ETokenType.ENDSCRIPT; 034import static eu.cqse.check.framework.scanner.ETokenType.EOL; 035import static eu.cqse.check.framework.scanner.ETokenType.EQUAL; 036import static eu.cqse.check.framework.scanner.ETokenType.FINAL; 037import static eu.cqse.check.framework.scanner.ETokenType.FOR; 038import static eu.cqse.check.framework.scanner.ETokenType.FUNCTION; 039import static eu.cqse.check.framework.scanner.ETokenType.GE; 040import static eu.cqse.check.framework.scanner.ETokenType.GOTO; 041import static eu.cqse.check.framework.scanner.ETokenType.GT; 042import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER; 043import static eu.cqse.check.framework.scanner.ETokenType.IF; 044import static eu.cqse.check.framework.scanner.ETokenType.IFDEF; 045import static eu.cqse.check.framework.scanner.ETokenType.IFNDEF; 046import static eu.cqse.check.framework.scanner.ETokenType.IN; 047import static eu.cqse.check.framework.scanner.ETokenType.INHERITS; 048import static eu.cqse.check.framework.scanner.ETokenType.INTEGER; 049import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE; 050import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 051import static eu.cqse.check.framework.scanner.ETokenType.LBRACK; 052import static eu.cqse.check.framework.scanner.ETokenType.LE; 053import static eu.cqse.check.framework.scanner.ETokenType.LIST; 054import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 055import static eu.cqse.check.framework.scanner.ETokenType.LT; 056import static eu.cqse.check.framework.scanner.ETokenType.NAME; 057import static eu.cqse.check.framework.scanner.ETokenType.NE; 058import static eu.cqse.check.framework.scanner.ETokenType.NODEBUG; 059import static eu.cqse.check.framework.scanner.ETokenType.NONE; 060import static eu.cqse.check.framework.scanner.ETokenType.NOT; 061import static eu.cqse.check.framework.scanner.ETokenType.OBJECT; 062import static eu.cqse.check.framework.scanner.ETokenType.OR; 063import static eu.cqse.check.framework.scanner.ETokenType.OVERRIDE; 064import static eu.cqse.check.framework.scanner.ETokenType.PACKAGE; 065import static eu.cqse.check.framework.scanner.ETokenType.PARENT; 066import static eu.cqse.check.framework.scanner.ETokenType.PREPROCESSOR_DIRECTIVE; 067import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE; 068import static eu.cqse.check.framework.scanner.ETokenType.PUBLIC; 069import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 070import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 071import static eu.cqse.check.framework.scanner.ETokenType.REAL; 072import static eu.cqse.check.framework.scanner.ETokenType.REPEAT; 073import static eu.cqse.check.framework.scanner.ETokenType.RETURN; 074import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 075import static eu.cqse.check.framework.scanner.ETokenType.SCRIPT; 076import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON; 077import static eu.cqse.check.framework.scanner.ETokenType.SET; 078import static eu.cqse.check.framework.scanner.ETokenType.STRING; 079import static eu.cqse.check.framework.scanner.ETokenType.SUPER; 080import static eu.cqse.check.framework.scanner.ETokenType.SWITCH; 081import static eu.cqse.check.framework.scanner.ETokenType.THEN; 082import static eu.cqse.check.framework.scanner.ETokenType.THIS; 083import static eu.cqse.check.framework.scanner.ETokenType.TO; 084import static eu.cqse.check.framework.scanner.ETokenType.UNDEFINED; 085import static eu.cqse.check.framework.scanner.ETokenType.UNTIL; 086import static eu.cqse.check.framework.scanner.ETokenType.USING; 087import static eu.cqse.check.framework.scanner.ETokenType.VOID; 088import static eu.cqse.check.framework.scanner.ETokenType.WHILE; 089import static eu.cqse.check.framework.shallowparser.SubTypeNames.ASSIGNMENT; 090import static eu.cqse.check.framework.shallowparser.SubTypeNames.GLOBAL_VARIABLE; 091import static eu.cqse.check.framework.shallowparser.SubTypeNames.LOCAL_VARIABLE; 092import static eu.cqse.check.framework.shallowparser.SubTypeNames.SIMPLE_STATEMENT; 093import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.ATTRIBUTE; 094import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.META; 095import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.METHOD; 096import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.MODULE; 097import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.STATEMENT; 098import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.TYPE; 099import static eu.cqse.check.framework.shallowparser.languages.oscript.EOScriptParserState.IN_FUNCTION; 100import static eu.cqse.check.framework.shallowparser.languages.oscript.EOScriptParserState.IN_OBJECT; 101import static eu.cqse.check.framework.shallowparser.languages.oscript.EOScriptParserState.IN_SCRIPT; 102import static eu.cqse.check.framework.shallowparser.languages.oscript.EOScriptParserState.TOP_LEVEL; 103 104import java.util.EnumSet; 105 106import org.conqat.lib.commons.region.Region; 107 108import eu.cqse.check.framework.scanner.ETokenType; 109import eu.cqse.check.framework.shallowparser.SubTypeNames; 110import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 111import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 112import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase; 113 114/** 115 * Shallow Parser for the OpenText OScript language 116 */ 117public class OScriptShallowParser extends ShallowParserBase<EOScriptParserState> { 118 119 /** Available types for variables */ 120 private static final EnumSet<ETokenType> VARIABLE_TYPES = EnumSet.of(BOOLEAN, INTEGER, REAL, STRING, OBJECT, ASSOC, 121 LIST, DATE, DYNAMIC, IDENTIFIER); 122 123 /** Tokens indicating end of stataments */ 124 private static final EnumSet<ETokenType> STATEMENT_END_TOKENS = EnumSet.of(EOL, SEMICOLON); 125 126 /** Set of all OScript built-in keyword identifiers */ 127 private static final EnumSet<ETokenType> OSCRIPT_IDENTIFIERS = EnumSet.of(IDENTIFIER, ADDFEATURE, AND, ASSOC, 128 BOOLEAN, BREAK, BREAKIF, BY, CASE, CONTINUE, CONTINUEIF, DATE, DEFAULT, DEFINE, DO, DOWNTO, DYNAMIC, ELSE, 129 ELSEIF, END, ENDIF, ENDSCRIPT, FINAL, FOR, FUNCTION, GE, GOTO, GT, IF, IFDEF, IFNDEF, IN, INHERITS, INTEGER, 130 INTERFACE, LE, LIST, LT, NAME, NE, NODEBUG, NONE, NOT, OBJECT, OR, OVERRIDE, PACKAGE, PARENT, PRIVATE, 131 PUBLIC, REAL, REPEAT, RETURN, SCRIPT, SET, STRING, SUPER, SWITCH, THEN, THIS, TO, UNDEFINED, UNTIL, USING, 132 WHILE); 133 134 /** Possible modifier for function or attributes */ 135 private static final EnumSet<ETokenType> MODIFIER = EnumSet.of(OVERRIDE, PUBLIC, PRIVATE); 136 137 /** Constructor. */ 138 public OScriptShallowParser() { 139 super(EOScriptParserState.class, EOScriptParserState.TOP_LEVEL); 140 createTopLevelRules(); 141 createVariableRules(); 142 createFunctionRules(); 143 createScriptRules(); 144 createRulesForStatements(); 145 } 146 147 /** 148 * Rules for every variable declaration and usage. 149 */ 150 private void createVariableRules() { 151 // Global variables 152 inState(TOP_LEVEL).subRecognizer(getTypeRecognizer()).subRecognizer(getVariableRecognizer()) 153 .createNode(ATTRIBUTE, GLOBAL_VARIABLE, 0).endNode(); 154 155 // Attributes 156 inState(IN_OBJECT, IN_SCRIPT).optional(MODIFIER).optionalSubRecognizer(getTypeRecognizer()) 157 .subRecognizer(getVariableRecognizer()).createNode(ATTRIBUTE, SubTypeNames.ATTRIBUTE, 0).endNode(); 158 159 // Local variables 160 inState(IN_FUNCTION).subRecognizer(getTypeRecognizer()).subRecognizer(getVariableRecognizer()) 161 .createNode(STATEMENT, LOCAL_VARIABLE, 0).endNode(); 162 inState(IN_FUNCTION).subRecognizer(getVariableNameRecognizer()).subRecognizer(getDeclarationRecognizer()) 163 .createNode(STATEMENT, LOCAL_VARIABLE, 0).endNode(); 164 165 // Assignments 166 inAnyState().subRecognizer(getVariableNameRecognizer()).subRecognizer(getAssignmentRecognizer()) 167 .createNode(STATEMENT, ASSIGNMENT, 0).endNode(); 168 } 169 170 /** 171 * Recognizer for the usage of a variable. It checks if it is a valid variable 172 * name and end. 173 */ 174 private RecognizerBase<EOScriptParserState> getVariableRecognizer() { 175 return createRecognizer( 176 start -> start.subRecognizer(getVariableNameRecognizer()).subRecognizer(getStatementEndRecognizer())); 177 } 178 179 /** 180 * Recognizer for the name or way a variable is adressed. This also includes 181 * stuff like: test.test().test 182 */ 183 private RecognizerBase<EOScriptParserState> getVariableNameRecognizer() { 184 return createRecognizer(start -> { 185 RecognizerBase<EOScriptParserState> prefix = start.optional(EnumSet.of(DOLLAR, DOT)).markStart() 186 .optionalSubRecognizer(getMethodCallRecognizer(false)).optional(DOT).repeated(IDENTIFIER, DOT); 187 188 // Munich RE's scripts has some weird object property assignment 189 // <code>a.(b)=c<code> 190 prefix.repeated(IDENTIFIER, DOT).optional(IDENTIFIER, DOT).sequence(LPAREN).skipToWithNesting(RPAREN, 191 LPAREN, RPAREN); 192 prefix.sequence(IDENTIFIER); 193 }); 194 } 195 196 /** Recognizer for a variable declaration */ 197 private RecognizerBase<EOScriptParserState> getDeclarationRecognizer() { 198 return createRecognizer(start -> { 199 start.sequence(COMMA); 200 start.sequence(STATEMENT_END_TOKENS); 201 }); 202 } 203 204 /** Recognizer for an assignment starting with an '='. */ 205 private RecognizerBase<EOScriptParserState> getAssignmentRecognizer() { 206 return createRecognizer(start -> { 207 RecognizerBase<EOScriptParserState> prefix = start.sequence(EQUAL); 208 prefix.subRecognizer(getMethodCallRecognizer(false)).repeated(DOT, IDENTIFIER); 209 prefix.sequence(LBRACE).skipToWithNesting(RBRACE, LBRACE, RBRACE); 210 prefix.sequence(LBRACK).skipToWithNesting(RBRACK, LBRACK, RBRACK); 211 prefix.skipTo(STATEMENT_END_TOKENS); 212 }); 213 } 214 215 /** 216 * Returns a recognizer which identifies if it is a valid statement end. This 217 * recognizer starts after the variable identifier. 218 * 219 * String a -->| = 3 (so here it would recognize every valid part which can come 220 * after "String a" 221 */ 222 private RecognizerBase<EOScriptParserState> getStatementEndRecognizer() { 223 return createRecognizer(start -> { 224 start.subRecognizer(getDeclarationRecognizer()); 225 start.subRecognizer(getAssignmentRecognizer()); 226 }); 227 } 228 229 /** 230 * A recognizer for the declaration of a variable with possible modifier and 231 * type. Stops before the identifier. 232 */ 233 private RecognizerBase<EOScriptParserState> getTypeRecognizer() { 234 return createRecognizer(start -> start.optional(MODIFIER).sequence(VARIABLE_TYPES).sequenceBefore(IDENTIFIER)); 235 } 236 237 /** 238 * Recognizer for a method call. This covers also multiple chained method calls 239 * and calls over multiple lines. 240 * 241 * For example: test.test().test(\n, a,\n b) 242 */ 243 private RecognizerBase<EOScriptParserState> getMethodCallRecognizer(boolean markStart) { 244 return createRecognizer(start -> { 245 if (markStart) { 246 start.optional(EnumSet.of(DOLLAR, DOT)).markStart().subRecognizer(getSimpleMethodCallRecognizer(), 1, 247 Integer.MAX_VALUE); 248 } else { 249 start.optional(EnumSet.of(DOLLAR, DOT)).subRecognizer(getSimpleMethodCallRecognizer(), 1, 250 Integer.MAX_VALUE); 251 } 252 }); 253 } 254 255 /** 256 * Recognizer for a simple method call like .test(), $test() or test() 257 */ 258 private RecognizerBase<EOScriptParserState> getSimpleMethodCallRecognizer() { 259 return createRecognizer(start -> start.optional(DOT).repeated(IDENTIFIER, DOT).sequence(IDENTIFIER, LPAREN) 260 .skipToWithNesting(RPAREN, LPAREN, RPAREN)); 261 } 262 263 /** 264 * Rules for top-level declarations: package and object statements. These are 265 * according to the new source format. 266 */ 267 private void createTopLevelRules() { 268 // Package 269 inState(TOP_LEVEL).sequence(PACKAGE).skipTo(STATEMENT_END_TOKENS) 270 .createNode(MODULE, SubTypeNames.PACKAGE, new Region(1, -2)).endNode(); 271 272 // Object 273 inState(TOP_LEVEL).sequence(PUBLIC, OBJECT, IDENTIFIER).markStart().createNode(TYPE, SubTypeNames.OBJECT, -1) 274 .optional(INHERITS, IDENTIFIER).repeated(DOUBLE_COLON, IDENTIFIER).parseUntil(IN_OBJECT).sequence(END) 275 .endNode(); 276 277 inState(TOP_LEVEL).sequence(EnumSet.of(NAME, PARENT, ADDFEATURE)).skipTo(STATEMENT_END_TOKENS) 278 .createNode(META, 0).endNode(); 279 280 // Preprocessor directives 281 inAnyState().sequence(PREPROCESSOR_DIRECTIVE).createNode(EShallowEntityType.META, 0).endNode(); 282 } 283 284 /** Rule for a script */ 285 private void createScriptRules() { 286 inState(TOP_LEVEL, IN_OBJECT).optional(EnumSet.of(OVERRIDE, PUBLIC, PRIVATE)) 287 .sequence(SCRIPT, OSCRIPT_IDENTIFIERS).createNode(TYPE, SubTypeNames.SCRIPT, -1).parseUntil(IN_SCRIPT) 288 .sequence(ENDSCRIPT).endNode(); 289 } 290 291 /** Rule for a function */ 292 private void createFunctionRules() { 293 inState(TOP_LEVEL, IN_OBJECT, IN_SCRIPT).optional(MODIFIER) 294 .sequence(FUNCTION, 295 EnumSet.of(BOOLEAN, INTEGER, REAL, STRING, OBJECT, ASSOC, LIST, DATE, DYNAMIC, VOID), 296 OSCRIPT_IDENTIFIERS, LPAREN) 297 .createNode(METHOD, SubTypeNames.FUNCTION, 2).skipToWithNesting(RPAREN, LPAREN, RPAREN) 298 .parseUntil(IN_FUNCTION).sequence(END).endNode(); 299 } 300 301 /** Rules for function and script statements */ 302 private void createRulesForStatements() { 303 createRulesForConditionStatements(); 304 createRulesForLoopStatements(); 305 306 // Labels and goto jumps 307 inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(IDENTIFIER, COLON).createNode(STATEMENT, SubTypeNames.LABEL) 308 .optional(STATEMENT_END_TOKENS).endNode(); 309 inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(GOTO, IDENTIFIER).createNode(STATEMENT, -1) 310 .optional(STATEMENT_END_TOKENS).endNode(); 311 312 // Simple keywords 313 inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(EnumSet.of(RETURN, BREAK, CONTINUE, BREAKIF, CONTINUEIF)) 314 .createNode(STATEMENT, 0).skipNested(LPAREN, RPAREN).skipTo(STATEMENT_END_TOKENS).endNode(); 315 316 // Label 317 inAnyState().sequence(IDENTIFIER, COLON).createNode(STATEMENT, SubTypeNames.LABEL, 0) 318 .skipTo(STATEMENT_END_TOKENS).endNode(); 319 320 // Method calls 321 inAnyState().subRecognizer(getMethodCallRecognizer(true)).createNode(STATEMENT, SIMPLE_STATEMENT, 0).endNode(); 322 } 323 324 /** Create rules for condition statements */ 325 private void createRulesForConditionStatements() { 326 // If Statement 327 createIfRules(IF); 328 createIfRules(ELSE); 329 createIfRules(ELSEIF); 330 331 // Switch 332 inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(SWITCH).createNode(STATEMENT, 0) 333 .skipToWithNesting(STATEMENT_END_TOKENS, LPAREN, RPAREN).parseUntil(IN_FUNCTION).sequence(END) 334 .endNode(); 335 inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(CASE) 336 .skipToWithNesting(STATEMENT_END_TOKENS, LPAREN, RPAREN) 337 .createNode(EShallowEntityType.META, new int[] { 0, 1 }).parseUntil(IN_FUNCTION).sequence(END) 338 .endNode(); 339 inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(DEFAULT, STATEMENT_END_TOKENS) 340 .createNode(EShallowEntityType.META, 0).parseUntil(IN_FUNCTION).sequence(END).endNode(); 341 } 342 343 /** Create rules for If/else-if conditions */ 344 private void createIfRules(ETokenType conditionToken) { 345 RecognizerBase<EOScriptParserState> ifRecognizer = inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT) 346 .sequence(conditionToken).skipToWithNesting(STATEMENT_END_TOKENS, LPAREN, RPAREN) 347 .createNode(STATEMENT, 0).parseUntil(IN_FUNCTION); 348 if (conditionToken == ELSE) { 349 ifRecognizer.sequence(END).endNode(); 350 return; 351 } 352 353 ifRecognizer.sequenceBefore(EnumSet.of(ELSE, ELSEIF)).endNodeWithContinuation(); 354 ifRecognizer.sequence(END).endNode(); 355 } 356 357 /** Create rules for loop statements */ 358 private void createRulesForLoopStatements() { 359 // While 360 inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(WHILE).createNode(STATEMENT, 0).skipNested(LPAREN, RPAREN) 361 .parseUntil(IN_FUNCTION).sequence(END).endNode(); 362 363 // Repeat 364 inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(REPEAT).createNode(STATEMENT, 0).parseUntil(IN_FUNCTION) 365 .sequence(WHILE).skipNested(LPAREN, RPAREN).endNode(); 366 367 // For...each, C-style and structured for loops 368 RecognizerBase<EOScriptParserState> forRecognizer = inState(TOP_LEVEL, IN_FUNCTION, IN_SCRIPT).sequence(FOR) 369 .markStart(); 370 forRecognizer.sequence(IDENTIFIER).createNode(STATEMENT, -2).skipNested(LPAREN, RPAREN) 371 .skipTo(STATEMENT_END_TOKENS).parseUntil(IN_FUNCTION).sequence(END).endNode(); 372 forRecognizer.skipNested(LPAREN, RPAREN).createNode(STATEMENT, -3).parseUntil(IN_FUNCTION).sequence(END) 373 .endNode(); 374 } 375 376}