001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.plsql; 018 019import static eu.cqse.check.framework.scanner.ETokenType.*; 020import static eu.cqse.check.framework.shallowparser.SubTypeNames.IFS_SEARCH_BLOCK; 021import static eu.cqse.check.framework.shallowparser.SubTypeNames.PREPROCESSOR_CONDITION; 022import static eu.cqse.check.framework.shallowparser.languages.plsql.PlsqlShallowParser.EPlsqlParserStates.DECLARATIONS; 023import static eu.cqse.check.framework.shallowparser.languages.plsql.PlsqlShallowParser.EPlsqlParserStates.STATEMENTS; 024 025import java.util.Arrays; 026import java.util.EnumSet; 027 028import org.conqat.lib.commons.region.Region; 029 030import eu.cqse.check.framework.scanner.ETokenType; 031import eu.cqse.check.framework.scanner.IToken; 032import eu.cqse.check.framework.shallowparser.SubTypeNames; 033import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 034import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 035import eu.cqse.check.framework.shallowparser.framework.SequenceRecognizer.ITokenMatcher; 036import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase; 037 038/** 039 * Shallow parser for PL/SQL. 040 */ 041public class PlsqlShallowParser extends ShallowParserBase<PlsqlShallowParser.EPlsqlParserStates> { 042 043 /** The states used in this parser. */ 044 public static enum EPlsqlParserStates { 045 046 /** A state to recognize declarations. */ 047 DECLARATIONS, 048 049 /** A state to recognize statements. */ 050 STATEMENTS 051 } 052 053 /** 054 * In PL/SQL all keywords may also be used as identifiers. There are also 055 * reserved words, which are not allowed as identifiers. 056 */ 057 private static final EnumSet<ETokenType> PLSQL_IDENTIFIERS = EnumSet.of(IDENTIFIER, A, ADD, AGENT, AGGREGATE, ARRAY, 058 ATTRIBUTE, AUTHID, AVG, BFILE_BASE, BINARY, BLOB_BASE, BLOCK, BODY, BOTH, BOUND, BULK, BYTE, C, CALL, 059 CALLING, CASCADE, CHAR, CHAR_BASE, CHARACTER, CHARSETFORM, CHARSETID, CHARSET, CLOB_BASE, CLOSE, COLLECT, 060 COMMENT, COMMIT, COMMITTED, COMPILED, CONSTANT, CONSTRUCTOR, CONTEXT, CONTINUE, CONVERT, COUNT, CURSOR, 061 CUSTOMDATUM, DANGLING, DATA, DATE, DATE_BASE, DAY, DEFINE, DETERMINISTIC, DOUBLE, DURATION, ELEMENT, ELSIF, 062 EMPTY, ESCAPE, EXCEPT, EXCEPTIONS, EXECUTE, EXIT, EXTERNAL, FINAL, FIXED, FLOAT, FORALL, FORCE, FUNCTION, 063 GENERAL, HASH, HEAP, HIDDEN, HOUR, IMMEDIATE, INCLUDING, INDICATOR, INDICES, INFINITE, INSTANTIABLE, INT, 064 INTERFACE, INTERVAL, INVALIDATE, ISOLATION, JAVA, LANGUAGE, LARGE, LEADING, LENGTH, LEVEL, LIBRARY, LIKE2, 065 LIKE4, LIKEC, LIMIT, LIMITED, LOCAL, LONG, LOOP, MAP, MAX, MAXLEN, MEMBER, MERGE, MIN, MINUTE, MOD, MODIFY, 066 MONTH, MULTISET, NAME, NAN, NATIONAL, NATIVE, NCHAR, NEW, NOCOPY, NUMBER_BASE, OBJECT, OCICOLL, OCIDATETIME, 067 OCIDATE, OCIDURATION, OCIINTERVAL, OCILOBLOCATOR, OCINUMBER, OCIRAW, OCIREFCURSOR, OCIREF, OCIROWID, 068 OCISTRING, OCITYPE, ONLY, OPAQUE, OPEN, OPERATOR, ORACLE, ORADATA, ORGANIZATION, ORLANY, ORLVARY, OTHERS, 069 OUT, OVERRIDING, PACKAGE, PARALLEL_ENABLE, PARAMETER, PARAMETERS, PARTITION, PASCAL, PIPE, PIPELINED, 070 PRAGMA, PRECISION, PRIVATE, RAISE, RANGE, RAW, READ, RECORD, REF, REFERENCE, RELIES_ON, REM, REMAINDER, 071 RENAME, REPLACE, RESULT, RESULT_CACHE, RETURN, RETURNING, REVERSE, ROLLBACK, ROW, SAMPLE, SAVE, SAVEPOINT, 072 SB1, SB2, SB4, SECOND, SEGMENT, SELF, SEPARATE, SEQUENCE, SERIALIZABLE, SET, SHORT, SIZE_T, SOME, SPARSE, 073 SQLCODE, SQLDATA, SQLNAME, SQLSTATE, STANDARD, STATIC, STDDEV, STORED, STRING, STRUCT, STYLE, SUBMULTISET, 074 SUBPARTITION, SUBSTITUTABLE, SUBTYPE, SUM, SYNONYM, TDO, THE, TIME, TIMESTAMP, TIMEZONE_ABBR, TIMEZONE_HOUR, 075 TIMEZONE_MINUTE, TIMEZONE_REGION, TRAILING, TRANSACTION, TRANSACTIONAL, TRUSTED, TYPE, UB1, UB2, UB4, UNDER, 076 UNSIGNED, UNTRUSTED, USE, USING, VALIST, VALUE, VARIABLE, VARIANCE, VARRAY, VARYING, VOID, WHILE, WORK, 077 WRAPPED, WRITE, YEAR, ZONE, 078 // these are not "official" keywords, but may be used as identifiers 079 // as well 080 DELETE, ON, OFF); 081 082 /** Constructor. */ 083 public PlsqlShallowParser() { 084 super(EPlsqlParserStates.class, DECLARATIONS); 085 086 createIfsPreprocessorRules(DECLARATIONS); 087 createIfsPreprocessorRules(STATEMENTS); 088 createMetaRules(); 089 createPackageAndTypeRules(); 090 createMethodAndAttributeRules(); 091 createStatementRules(); 092 } 093 094 /** 095 * Create rules for IFS preprocessor directives. Since these are interpreted by 096 * the preprocessor, they do not change the parser state. 097 */ 098 private void createIfsPreprocessorRules(EPlsqlParserStates state) { 099 RecognizerBase<EPlsqlParserStates> ifAlternative = inState(state).sequence(IFS_IF) 100 .createNode(EShallowEntityType.META, PREPROCESSOR_CONDITION, -1).skipTo(IFS_THEN).parseUntil(state) 101 .sequenceBefore(EnumSet.of(IFS_ELSE, IFS_END)); 102 ifAlternative.sequence(IFS_END).endNode(); 103 ifAlternative.endNodeWithContinuation(); 104 105 inState(state).sequence(IFS_ELSE).createNode(EShallowEntityType.META, PREPROCESSOR_CONDITION, -1) 106 .parseUntil(state).sequence(IFS_END).endNode(); 107 108 inState(state).sequence(IFS_PREPEND).createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1) 109 .parseStrictlyUntil(state).sequenceBefore(IFS_SEARCH).endNodeWithContinuation(); 110 111 RecognizerBase<EPlsqlParserStates> searchAlternative = inState(state).sequence(IFS_SEARCH) 112 .createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1) 113 .skipBeforeWithNesting(EnumSet.of(IFS_END, IFS_APPEND, IFS_REPLACE), IFS_IF, IFS_END); 114 searchAlternative.sequence(IFS_END).endNode(); 115 searchAlternative.endNodeWithContinuation(); 116 117 inState(state).sequence(EnumSet.of(IFS_APPEND, IFS_REPLACE)) 118 .createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1).parseStrictlyUntil(state) 119 .skipToWithNesting(IFS_END, IFS_IF, IFS_END).endNode(); 120 121 inState(state).sequence(IFS_TEXTPREPEND).createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1) 122 .parseStrictlyUntil(state).sequenceBefore(IFS_TEXTSEARCH).endNodeWithContinuation(); 123 124 RecognizerBase<EPlsqlParserStates> textsearchAlternative = inState(state).sequence(IFS_TEXTSEARCH) 125 .createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1) 126 .skipBefore(EnumSet.of(IFS_TEXTEND, IFS_TEXTAPPEND, IFS_TEXTREPLACE)); 127 textsearchAlternative.sequence(IFS_TEXTEND).endNode(); 128 textsearchAlternative.endNodeWithContinuation(); 129 130 inState(state).sequence(EnumSet.of(IFS_TEXTAPPEND, IFS_TEXTREPLACE)) 131 .createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1).parseStrictlyUntil(state) 132 .sequence(IFS_TEXTEND).endNode(); 133 } 134 135 /** Create rules for parsing meta elements. */ 136 private void createMetaRules() { 137 138 // SHOW ERROR is tricky, as it allows for multiple abbreviations 139 inState(DECLARATIONS).sequence(new IdentifierPrefixMatcher("sho"), new IdentifierPrefixMatcher("err")) 140 .optional(SEMICOLON).createNode(EShallowEntityType.META, "show errors").endNode(); 141 142 // exit 143 inState(DECLARATIONS).sequence(EXIT).optional(INTEGER_LITERAL).optional(SEMICOLON) 144 .createNode(EShallowEntityType.META, "exit").endNode(); 145 146 // pragma 147 inAnyState().sequence(PRAGMA).createNode(EShallowEntityType.META, "pragma").skipTo(SEMICOLON).endNode(); 148 149 // exception section 150 inState(STATEMENTS).sequence(EXCEPTION).createNode(EShallowEntityType.META, "exception section").endNode(); 151 152 // IFS Override Annotation for Procedures 153 inState(DECLARATIONS).sequence(OVERRIDE).createNode(EShallowEntityType.META, -1).endNode(); 154 inState(DECLARATIONS).sequence(OVERTAKE).createNode(EShallowEntityType.META, "@overtake", -1).endNode(); 155 inState(DECLARATIONS).sequence(ANNOTATION).createNode(EShallowEntityType.META, "annotation", -1).endNode(); 156 157 // single and double 'at' sign execution; see 158 // http://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm#BACIEHDJ 159 inState(DECLARATIONS).sequence(AT).optional(AT).createNode(EShallowEntityType.META, "run script") 160 .repeated(EnumSet.of(IDENTIFIER, DOT, MINUS, MOD)).endNode(); 161 162 // SQL statements 163 inAnyState() 164 .sequence(EnumSet.of(AGGREGATE, ALTER, COMMIT, DELETE, GRANT, INSERT, LOCK, ROLLBACK, SAVEPOINT, SELECT, 165 DROP, MERGE, UPDATE)) 166 .createNode(EShallowEntityType.STATEMENT, SubTypeNames.SQL, 0).skipTo(SEMICOLON).endNode(); 167 inAnyState().sequence(SET, TRANSACTION).createNode(EShallowEntityType.STATEMENT, SubTypeNames.SQL, 0) 168 .skipTo(SEMICOLON).endNode(); 169 inAnyState().sequence(CREATE).optional(OR, REPLACE).optional(PUBLIC).sequence(SYNONYM) 170 .createNode(EShallowEntityType.STATEMENT, SubTypeNames.SQL, 0).skipTo(SEMICOLON).endNode(); 171 172 // set 173 EnumSet<ETokenType> setIdentifiers = EnumSet.of(IDENTIFIER, DEFINE); 174 inState(DECLARATIONS).sequence(SET, setIdentifiers, PLSQL_IDENTIFIERS) 175 .createNode(EShallowEntityType.META, "set").repeated(setIdentifiers, PLSQL_IDENTIFIERS) 176 .optional(SEMICOLON).endNode(); 177 178 // deal with dangling end by inserting broken node 179 inAnyState().sequence(END).createNode(EShallowEntityType.META, "dangling end").skipTo(SEMICOLON); // endNode() 180 // omitted! 181 } 182 183 /** Creates parsing rules for packages and types. */ 184 private void createPackageAndTypeRules() { 185 186 // packages 187 RecognizerBase<EPlsqlParserStates> optionalBeginAlternative = createOrReplace().sequence(PACKAGE).optional(BODY) 188 .markStart().repeated(PLSQL_IDENTIFIERS, DOT).sequence(PLSQL_IDENTIFIERS) 189 .createNode(EShallowEntityType.MODULE, "package", new Region(0, -1)).skipTo(EnumSet.of(IS, AS)) 190 .parseUntil(DECLARATIONS); 191 optionalBeginAlternative.sequence(BEGIN).parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode(); 192 optionalBeginAlternative.sequence(END).skipTo(SEMICOLON).endNode(); 193 194 // type body 195 createOrReplace().sequence(TYPE, BODY).markStart().repeated(PLSQL_IDENTIFIERS, DOT).sequence(PLSQL_IDENTIFIERS) 196 .createNode(EShallowEntityType.MODULE, "type body", new Region(0, -1)).skipTo(EnumSet.of(IS, AS)) 197 .parseUntil(DECLARATIONS).sequence(END).skipTo(SEMICOLON).endNode(); 198 199 // type 200 RecognizerBase<EPlsqlParserStates> typeMatcher = createOrReplace().markStart().sequence(TYPE) 201 .repeated(PLSQL_IDENTIFIERS, DOT).sequence(PLSQL_IDENTIFIERS) 202 .createNode(EShallowEntityType.TYPE, 0, new Region(1, -1)) 203 .skipBefore(EnumSet.of(IS, AS, UNDER, SEMICOLON, DIV)); 204 typeMatcher.sequence(EnumSet.of(SEMICOLON, DIV)).endNode(); 205 RecognizerBase<EPlsqlParserStates> typeMatcher2 = typeMatcher.skipBefore(EnumSet.of(IS, AS, UNDER)) 206 .optional(EnumSet.of(IS, AS)); 207 typeMatcher2.sequence(EnumSet.of(OBJECT, UNDER)).skipTo(LPAREN).parseUntil(DECLARATIONS) 208 // closing paren is swallowed by decl rules 209 .repeated(EnumSet.of(NOT, FINAL, INSTANTIABLE)).sequence(EnumSet.of(SEMICOLON, DIV)).endNode(); 210 typeMatcher2.skipTo(EnumSet.of(SEMICOLON, DIV)).endNode(); 211 212 // top-level code block 213 inState(DECLARATIONS).sequence(BEGIN).createNode(EShallowEntityType.METHOD, "top-level code") 214 .parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode(); 215 } 216 217 /** Create parser rules for functions, procedures, constructors, etc. */ 218 private void createMethodAndAttributeRules() { 219 220 // function/procedure 221 RecognizerBase<EPlsqlParserStates> methodStart = createOrReplace() 222 .repeated(EnumSet.of(MAP, NOT, OVERRIDING, ORDER, FINAL, INSTANTIABLE, MEMBER, STATIC, CONSTRUCTOR)) 223 .markStart().sequence(EnumSet.of(PROCEDURE, FUNCTION)).repeated(PLSQL_IDENTIFIERS, DOT) 224 .sequence(PLSQL_IDENTIFIERS).createNode(EShallowEntityType.METHOD, 0, new Region(1, -1)) 225 .skipNested(LPAREN, RPAREN).optional(RETURN, SELF, AS, RESULT) 226 .skipBefore(EnumSet.of(SEMICOLON, IS, AS, RPAREN, COMMA)); 227 methodStart.sequence(EnumSet.of(SEMICOLON, RPAREN, COMMA)).endNode(); 228 229 RecognizerBase<EPlsqlParserStates> methodStart2 = methodStart.sequence(EnumSet.of(IS, AS)); 230 methodStart2.sequence(EnumSet.of(LANGUAGE, EXTERNAL)).skipTo(SEMICOLON).endNode(); 231 methodStart2.parseUntil(DECLARATIONS).sequence(BEGIN).parseUntil(STATEMENTS).sequence(END) 232 .skipToWithNesting(EnumSet.of(SEMICOLON, RPAREN, COMMA), LPAREN, RPAREN).endNode(); 233 234 // view 235 createOrReplace().markStart().sequence(VIEW, PLSQL_IDENTIFIERS, AS) 236 .createNode(EShallowEntityType.ATTRIBUTE, 0, new Region(1, -2)).skipTo(SEMICOLON).endNode(); 237 238 // exception declaration 239 inState(DECLARATIONS).sequence(PLSQL_IDENTIFIERS, EXCEPTION, SEMICOLON) 240 .createNode(EShallowEntityType.META, "exception declaration", 0).endNode(); 241 242 // trigger 243 RecognizerBase<EPlsqlParserStates> triggerMatch = createOrReplace().markStart().sequence(TRIGGER) 244 .repeated(PLSQL_IDENTIFIERS, DOT).sequence(PLSQL_IDENTIFIERS) 245 .createNode(EShallowEntityType.METHOD, 0, new Region(1, -1)) 246 .skipBefore(EnumSet.of(SEMICOLON, DECLARE, BEGIN)); 247 triggerMatch.sequence(SEMICOLON).endNode(); 248 triggerMatch.sequence(BEGIN).parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode(); 249 triggerMatch.sequence(DECLARE).parseUntil(DECLARATIONS).sequence(BEGIN).parseUntil(STATEMENTS).sequence(END) 250 .skipTo(SEMICOLON).endNode(); 251 252 // cursor declaration 253 inState(DECLARATIONS).sequence(CURSOR).skipTo(SEMICOLON) 254 .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.CURSOR, 1).endNode(); 255 256 // variables and constants 257 inState(DECLARATIONS).sequence(PLSQL_IDENTIFIERS).createNode(EShallowEntityType.ATTRIBUTE, "variable", 0) 258 .skipToWithNesting(EnumSet.of(SEMICOLON, RPAREN, COMMA), LPAREN, RPAREN).endNode(); 259 } 260 261 /** Matches the optional CREATE OR REPLACE clause. */ 262 private RecognizerBase<EPlsqlParserStates> createOrReplace() { 263 return inState(DECLARATIONS).optional(CREATE).optional(OR, REPLACE); 264 } 265 266 /** Creates parser rules for statements. */ 267 private void createStatementRules() { 268 // if/elseif 269 RecognizerBase<EPlsqlParserStates> ifAlternative = inState(STATEMENTS).sequence(EnumSet.of(IF, ELSIF)) 270 .createNode(EShallowEntityType.STATEMENT, 0).skipToWithNesting(THEN, CASE, END).parseUntil(STATEMENTS) 271 .sequenceBefore(EnumSet.of(ELSIF, ELSE, END)); 272 ifAlternative.sequence(END, IF, SEMICOLON).endNode(); 273 ifAlternative.endNodeWithContinuation(); 274 275 // else (both for if and case) 276 RecognizerBase<EPlsqlParserStates> elseMatcher = inState(STATEMENTS).sequence(ELSE) 277 .createNode(EShallowEntityType.STATEMENT, 0).parseUntil(STATEMENTS); 278 elseMatcher.sequence(END, IF).skipTo(SEMICOLON).endNode(); 279 elseMatcher.sequenceBefore(END, CASE).endNode(); 280 281 // loops 282 inState(STATEMENTS).sequence(LOOP).createNode(EShallowEntityType.STATEMENT, 0).parseUntil(STATEMENTS) 283 .sequence(END, LOOP).skipTo(SEMICOLON).endNode(); 284 285 inState(STATEMENTS).sequence(EnumSet.of(WHILE, FOR)).createNode(EShallowEntityType.STATEMENT, 0).skipTo(LOOP) 286 .parseUntil(STATEMENTS).sequence(END, LOOP).skipTo(SEMICOLON).endNode(); 287 288 // blocks 289 inState(STATEMENTS).sequence(DECLARE).createNode(EShallowEntityType.STATEMENT, "block").parseUntil(DECLARATIONS) 290 .sequence(BEGIN).parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode(); 291 inState(STATEMENTS).sequence(BEGIN).createNode(EShallowEntityType.STATEMENT, "block").parseUntil(STATEMENTS) 292 .sequence(END).skipTo(SEMICOLON).endNode(); 293 294 // case 295 inState(STATEMENTS).sequence(CASE).createNode(EShallowEntityType.STATEMENT, 0) 296 .skipBefore(EnumSet.of(WHEN, ELSE)).parseUntil(STATEMENTS).sequence(END, CASE).skipTo(SEMICOLON) 297 .endNode(); 298 299 // when (in exceptions or case) 300 inState(STATEMENTS).sequence(WHEN).skipTo(THEN).createNode(EShallowEntityType.META, "when", 1).endNode(); 301 302 // labels 303 inState(STATEMENTS).sequence(LEFT_LABEL_BRACKET, PLSQL_IDENTIFIERS, RIGHT_LABEL_BRACKET) 304 .createNode(EShallowEntityType.META, SubTypeNames.LABEL, 1).endNode(); 305 306 // basic statement 307 EnumSet<ETokenType> basicStatementStarts = EnumSet.copyOf(PLSQL_IDENTIFIERS); 308 basicStatementStarts.addAll(Arrays.asList(SELF, RETURN, GOTO, FETCH, NULL_LITERAL)); 309 inState(STATEMENTS).sequence(basicStatementStarts).createNode(EShallowEntityType.STATEMENT, 0).skipTo(SEMICOLON) 310 .endNode(); 311 } 312 313 /** Matcher for identifiers by a prefix. */ 314 private static class IdentifierPrefixMatcher implements ITokenMatcher { 315 316 /** The prefix. */ 317 private final String prefix; 318 319 /** Constructor. */ 320 public IdentifierPrefixMatcher(String prefix) { 321 this.prefix = prefix.toLowerCase(); 322 } 323 324 /** {@inheritDoc} */ 325 @Override 326 public boolean matches(IToken token) { 327 return token.getType() == IDENTIFIER && token.getText().toLowerCase().startsWith(prefix); 328 } 329 } 330}