001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.plsql;
018
019import static eu.cqse.check.framework.scanner.ETokenType.*;
020import static eu.cqse.check.framework.shallowparser.SubTypeNames.IFS_SEARCH_BLOCK;
021import static eu.cqse.check.framework.shallowparser.SubTypeNames.PREPROCESSOR_CONDITION;
022import static eu.cqse.check.framework.shallowparser.languages.plsql.PlsqlShallowParser.EPlsqlParserStates.DECLARATIONS;
023import static eu.cqse.check.framework.shallowparser.languages.plsql.PlsqlShallowParser.EPlsqlParserStates.STATEMENTS;
024
025import java.util.Arrays;
026import java.util.EnumSet;
027
028import org.conqat.lib.commons.region.Region;
029
030import eu.cqse.check.framework.scanner.ETokenType;
031import eu.cqse.check.framework.scanner.IToken;
032import eu.cqse.check.framework.shallowparser.SubTypeNames;
033import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
034import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
035import eu.cqse.check.framework.shallowparser.framework.SequenceRecognizer.ITokenMatcher;
036import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
037
038/**
039 * Shallow parser for PL/SQL.
040 */
041public class PlsqlShallowParser extends ShallowParserBase<PlsqlShallowParser.EPlsqlParserStates> {
042
043        /** The states used in this parser. */
044        public static enum EPlsqlParserStates {
045
046        /** A state to recognize declarations. */
047        DECLARATIONS,
048
049        /** A state to recognize statements. */
050        STATEMENTS
051        }
052
053        /**
054         * In PL/SQL all keywords may also be used as identifiers. There are also
055         * reserved words, which are not allowed as identifiers.
056         */
057        private static final EnumSet<ETokenType> PLSQL_IDENTIFIERS = EnumSet.of(IDENTIFIER, A, ADD, AGENT, AGGREGATE, ARRAY,
058                        ATTRIBUTE, AUTHID, AVG, BFILE_BASE, BINARY, BLOB_BASE, BLOCK, BODY, BOTH, BOUND, BULK, BYTE, C, CALL,
059                        CALLING, CASCADE, CHAR, CHAR_BASE, CHARACTER, CHARSETFORM, CHARSETID, CHARSET, CLOB_BASE, CLOSE, COLLECT,
060                        COMMENT, COMMIT, COMMITTED, COMPILED, CONSTANT, CONSTRUCTOR, CONTEXT, CONTINUE, CONVERT, COUNT, CURSOR,
061                        CUSTOMDATUM, DANGLING, DATA, DATE, DATE_BASE, DAY, DEFINE, DETERMINISTIC, DOUBLE, DURATION, ELEMENT, ELSIF,
062                        EMPTY, ESCAPE, EXCEPT, EXCEPTIONS, EXECUTE, EXIT, EXTERNAL, FINAL, FIXED, FLOAT, FORALL, FORCE, FUNCTION,
063                        GENERAL, HASH, HEAP, HIDDEN, HOUR, IMMEDIATE, INCLUDING, INDICATOR, INDICES, INFINITE, INSTANTIABLE, INT,
064                        INTERFACE, INTERVAL, INVALIDATE, ISOLATION, JAVA, LANGUAGE, LARGE, LEADING, LENGTH, LEVEL, LIBRARY, LIKE2,
065                        LIKE4, LIKEC, LIMIT, LIMITED, LOCAL, LONG, LOOP, MAP, MAX, MAXLEN, MEMBER, MERGE, MIN, MINUTE, MOD, MODIFY,
066                        MONTH, MULTISET, NAME, NAN, NATIONAL, NATIVE, NCHAR, NEW, NOCOPY, NUMBER_BASE, OBJECT, OCICOLL, OCIDATETIME,
067                        OCIDATE, OCIDURATION, OCIINTERVAL, OCILOBLOCATOR, OCINUMBER, OCIRAW, OCIREFCURSOR, OCIREF, OCIROWID,
068                        OCISTRING, OCITYPE, ONLY, OPAQUE, OPEN, OPERATOR, ORACLE, ORADATA, ORGANIZATION, ORLANY, ORLVARY, OTHERS,
069                        OUT, OVERRIDING, PACKAGE, PARALLEL_ENABLE, PARAMETER, PARAMETERS, PARTITION, PASCAL, PIPE, PIPELINED,
070                        PRAGMA, PRECISION, PRIVATE, RAISE, RANGE, RAW, READ, RECORD, REF, REFERENCE, RELIES_ON, REM, REMAINDER,
071                        RENAME, REPLACE, RESULT, RESULT_CACHE, RETURN, RETURNING, REVERSE, ROLLBACK, ROW, SAMPLE, SAVE, SAVEPOINT,
072                        SB1, SB2, SB4, SECOND, SEGMENT, SELF, SEPARATE, SEQUENCE, SERIALIZABLE, SET, SHORT, SIZE_T, SOME, SPARSE,
073                        SQLCODE, SQLDATA, SQLNAME, SQLSTATE, STANDARD, STATIC, STDDEV, STORED, STRING, STRUCT, STYLE, SUBMULTISET,
074                        SUBPARTITION, SUBSTITUTABLE, SUBTYPE, SUM, SYNONYM, TDO, THE, TIME, TIMESTAMP, TIMEZONE_ABBR, TIMEZONE_HOUR,
075                        TIMEZONE_MINUTE, TIMEZONE_REGION, TRAILING, TRANSACTION, TRANSACTIONAL, TRUSTED, TYPE, UB1, UB2, UB4, UNDER,
076                        UNSIGNED, UNTRUSTED, USE, USING, VALIST, VALUE, VARIABLE, VARIANCE, VARRAY, VARYING, VOID, WHILE, WORK,
077                        WRAPPED, WRITE, YEAR, ZONE,
078                        // these are not "official" keywords, but may be used as identifiers
079                        // as well
080                        DELETE, ON, OFF);
081
082        /** Constructor. */
083        public PlsqlShallowParser() {
084                super(EPlsqlParserStates.class, DECLARATIONS);
085
086                createIfsPreprocessorRules(DECLARATIONS);
087                createIfsPreprocessorRules(STATEMENTS);
088                createMetaRules();
089                createPackageAndTypeRules();
090                createMethodAndAttributeRules();
091                createStatementRules();
092        }
093
094        /**
095         * Create rules for IFS preprocessor directives. Since these are interpreted by
096         * the preprocessor, they do not change the parser state.
097         */
098        private void createIfsPreprocessorRules(EPlsqlParserStates state) {
099                RecognizerBase<EPlsqlParserStates> ifAlternative = inState(state).sequence(IFS_IF)
100                                .createNode(EShallowEntityType.META, PREPROCESSOR_CONDITION, -1).skipTo(IFS_THEN).parseUntil(state)
101                                .sequenceBefore(EnumSet.of(IFS_ELSE, IFS_END));
102                ifAlternative.sequence(IFS_END).endNode();
103                ifAlternative.endNodeWithContinuation();
104
105                inState(state).sequence(IFS_ELSE).createNode(EShallowEntityType.META, PREPROCESSOR_CONDITION, -1)
106                                .parseUntil(state).sequence(IFS_END).endNode();
107
108                inState(state).sequence(IFS_PREPEND).createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1)
109                                .parseStrictlyUntil(state).sequenceBefore(IFS_SEARCH).endNodeWithContinuation();
110
111                RecognizerBase<EPlsqlParserStates> searchAlternative = inState(state).sequence(IFS_SEARCH)
112                                .createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1)
113                                .skipBeforeWithNesting(EnumSet.of(IFS_END, IFS_APPEND, IFS_REPLACE), IFS_IF, IFS_END);
114                searchAlternative.sequence(IFS_END).endNode();
115                searchAlternative.endNodeWithContinuation();
116
117                inState(state).sequence(EnumSet.of(IFS_APPEND, IFS_REPLACE))
118                                .createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1).parseStrictlyUntil(state)
119                                .skipToWithNesting(IFS_END, IFS_IF, IFS_END).endNode();
120
121                inState(state).sequence(IFS_TEXTPREPEND).createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1)
122                                .parseStrictlyUntil(state).sequenceBefore(IFS_TEXTSEARCH).endNodeWithContinuation();
123
124                RecognizerBase<EPlsqlParserStates> textsearchAlternative = inState(state).sequence(IFS_TEXTSEARCH)
125                                .createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1)
126                                .skipBefore(EnumSet.of(IFS_TEXTEND, IFS_TEXTAPPEND, IFS_TEXTREPLACE));
127                textsearchAlternative.sequence(IFS_TEXTEND).endNode();
128                textsearchAlternative.endNodeWithContinuation();
129
130                inState(state).sequence(EnumSet.of(IFS_TEXTAPPEND, IFS_TEXTREPLACE))
131                                .createNode(EShallowEntityType.META, IFS_SEARCH_BLOCK, -1).parseStrictlyUntil(state)
132                                .sequence(IFS_TEXTEND).endNode();
133        }
134
135        /** Create rules for parsing meta elements. */
136        private void createMetaRules() {
137
138                // SHOW ERROR is tricky, as it allows for multiple abbreviations
139                inState(DECLARATIONS).sequence(new IdentifierPrefixMatcher("sho"), new IdentifierPrefixMatcher("err"))
140                                .optional(SEMICOLON).createNode(EShallowEntityType.META, "show errors").endNode();
141
142                // exit
143                inState(DECLARATIONS).sequence(EXIT).optional(INTEGER_LITERAL).optional(SEMICOLON)
144                                .createNode(EShallowEntityType.META, "exit").endNode();
145
146                // pragma
147                inAnyState().sequence(PRAGMA).createNode(EShallowEntityType.META, "pragma").skipTo(SEMICOLON).endNode();
148
149                // exception section
150                inState(STATEMENTS).sequence(EXCEPTION).createNode(EShallowEntityType.META, "exception section").endNode();
151
152                // IFS Override Annotation for Procedures
153                inState(DECLARATIONS).sequence(OVERRIDE).createNode(EShallowEntityType.META, -1).endNode();
154                inState(DECLARATIONS).sequence(OVERTAKE).createNode(EShallowEntityType.META, "@overtake", -1).endNode();
155                inState(DECLARATIONS).sequence(ANNOTATION).createNode(EShallowEntityType.META, "annotation", -1).endNode();
156
157                // single and double 'at' sign execution; see
158                // http://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm#BACIEHDJ
159                inState(DECLARATIONS).sequence(AT).optional(AT).createNode(EShallowEntityType.META, "run script")
160                                .repeated(EnumSet.of(IDENTIFIER, DOT, MINUS, MOD)).endNode();
161
162                // SQL statements
163                inAnyState()
164                                .sequence(EnumSet.of(AGGREGATE, ALTER, COMMIT, DELETE, GRANT, INSERT, LOCK, ROLLBACK, SAVEPOINT, SELECT,
165                                                DROP, MERGE, UPDATE))
166                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.SQL, 0).skipTo(SEMICOLON).endNode();
167                inAnyState().sequence(SET, TRANSACTION).createNode(EShallowEntityType.STATEMENT, SubTypeNames.SQL, 0)
168                                .skipTo(SEMICOLON).endNode();
169                inAnyState().sequence(CREATE).optional(OR, REPLACE).optional(PUBLIC).sequence(SYNONYM)
170                                .createNode(EShallowEntityType.STATEMENT, SubTypeNames.SQL, 0).skipTo(SEMICOLON).endNode();
171
172                // set
173                EnumSet<ETokenType> setIdentifiers = EnumSet.of(IDENTIFIER, DEFINE);
174                inState(DECLARATIONS).sequence(SET, setIdentifiers, PLSQL_IDENTIFIERS)
175                                .createNode(EShallowEntityType.META, "set").repeated(setIdentifiers, PLSQL_IDENTIFIERS)
176                                .optional(SEMICOLON).endNode();
177
178                // deal with dangling end by inserting broken node
179                inAnyState().sequence(END).createNode(EShallowEntityType.META, "dangling end").skipTo(SEMICOLON); // endNode()
180                                                                                                                                                                                                                        // omitted!
181        }
182
183        /** Creates parsing rules for packages and types. */
184        private void createPackageAndTypeRules() {
185
186                // packages
187                RecognizerBase<EPlsqlParserStates> optionalBeginAlternative = createOrReplace().sequence(PACKAGE).optional(BODY)
188                                .markStart().repeated(PLSQL_IDENTIFIERS, DOT).sequence(PLSQL_IDENTIFIERS)
189                                .createNode(EShallowEntityType.MODULE, "package", new Region(0, -1)).skipTo(EnumSet.of(IS, AS))
190                                .parseUntil(DECLARATIONS);
191                optionalBeginAlternative.sequence(BEGIN).parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode();
192                optionalBeginAlternative.sequence(END).skipTo(SEMICOLON).endNode();
193
194                // type body
195                createOrReplace().sequence(TYPE, BODY).markStart().repeated(PLSQL_IDENTIFIERS, DOT).sequence(PLSQL_IDENTIFIERS)
196                                .createNode(EShallowEntityType.MODULE, "type body", new Region(0, -1)).skipTo(EnumSet.of(IS, AS))
197                                .parseUntil(DECLARATIONS).sequence(END).skipTo(SEMICOLON).endNode();
198
199                // type
200                RecognizerBase<EPlsqlParserStates> typeMatcher = createOrReplace().markStart().sequence(TYPE)
201                                .repeated(PLSQL_IDENTIFIERS, DOT).sequence(PLSQL_IDENTIFIERS)
202                                .createNode(EShallowEntityType.TYPE, 0, new Region(1, -1))
203                                .skipBefore(EnumSet.of(IS, AS, UNDER, SEMICOLON, DIV));
204                typeMatcher.sequence(EnumSet.of(SEMICOLON, DIV)).endNode();
205                RecognizerBase<EPlsqlParserStates> typeMatcher2 = typeMatcher.skipBefore(EnumSet.of(IS, AS, UNDER))
206                                .optional(EnumSet.of(IS, AS));
207                typeMatcher2.sequence(EnumSet.of(OBJECT, UNDER)).skipTo(LPAREN).parseUntil(DECLARATIONS)
208                                // closing paren is swallowed by decl rules
209                                .repeated(EnumSet.of(NOT, FINAL, INSTANTIABLE)).sequence(EnumSet.of(SEMICOLON, DIV)).endNode();
210                typeMatcher2.skipTo(EnumSet.of(SEMICOLON, DIV)).endNode();
211
212                // top-level code block
213                inState(DECLARATIONS).sequence(BEGIN).createNode(EShallowEntityType.METHOD, "top-level code")
214                                .parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode();
215        }
216
217        /** Create parser rules for functions, procedures, constructors, etc. */
218        private void createMethodAndAttributeRules() {
219
220                // function/procedure
221                RecognizerBase<EPlsqlParserStates> methodStart = createOrReplace()
222                                .repeated(EnumSet.of(MAP, NOT, OVERRIDING, ORDER, FINAL, INSTANTIABLE, MEMBER, STATIC, CONSTRUCTOR))
223                                .markStart().sequence(EnumSet.of(PROCEDURE, FUNCTION)).repeated(PLSQL_IDENTIFIERS, DOT)
224                                .sequence(PLSQL_IDENTIFIERS).createNode(EShallowEntityType.METHOD, 0, new Region(1, -1))
225                                .skipNested(LPAREN, RPAREN).optional(RETURN, SELF, AS, RESULT)
226                                .skipBefore(EnumSet.of(SEMICOLON, IS, AS, RPAREN, COMMA));
227                methodStart.sequence(EnumSet.of(SEMICOLON, RPAREN, COMMA)).endNode();
228
229                RecognizerBase<EPlsqlParserStates> methodStart2 = methodStart.sequence(EnumSet.of(IS, AS));
230                methodStart2.sequence(EnumSet.of(LANGUAGE, EXTERNAL)).skipTo(SEMICOLON).endNode();
231                methodStart2.parseUntil(DECLARATIONS).sequence(BEGIN).parseUntil(STATEMENTS).sequence(END)
232                                .skipToWithNesting(EnumSet.of(SEMICOLON, RPAREN, COMMA), LPAREN, RPAREN).endNode();
233
234                // view
235                createOrReplace().markStart().sequence(VIEW, PLSQL_IDENTIFIERS, AS)
236                                .createNode(EShallowEntityType.ATTRIBUTE, 0, new Region(1, -2)).skipTo(SEMICOLON).endNode();
237
238                // exception declaration
239                inState(DECLARATIONS).sequence(PLSQL_IDENTIFIERS, EXCEPTION, SEMICOLON)
240                                .createNode(EShallowEntityType.META, "exception declaration", 0).endNode();
241
242                // trigger
243                RecognizerBase<EPlsqlParserStates> triggerMatch = createOrReplace().markStart().sequence(TRIGGER)
244                                .repeated(PLSQL_IDENTIFIERS, DOT).sequence(PLSQL_IDENTIFIERS)
245                                .createNode(EShallowEntityType.METHOD, 0, new Region(1, -1))
246                                .skipBefore(EnumSet.of(SEMICOLON, DECLARE, BEGIN));
247                triggerMatch.sequence(SEMICOLON).endNode();
248                triggerMatch.sequence(BEGIN).parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode();
249                triggerMatch.sequence(DECLARE).parseUntil(DECLARATIONS).sequence(BEGIN).parseUntil(STATEMENTS).sequence(END)
250                                .skipTo(SEMICOLON).endNode();
251
252                // cursor declaration
253                inState(DECLARATIONS).sequence(CURSOR).skipTo(SEMICOLON)
254                                .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.CURSOR, 1).endNode();
255
256                // variables and constants
257                inState(DECLARATIONS).sequence(PLSQL_IDENTIFIERS).createNode(EShallowEntityType.ATTRIBUTE, "variable", 0)
258                                .skipToWithNesting(EnumSet.of(SEMICOLON, RPAREN, COMMA), LPAREN, RPAREN).endNode();
259        }
260
261        /** Matches the optional CREATE OR REPLACE clause. */
262        private RecognizerBase<EPlsqlParserStates> createOrReplace() {
263                return inState(DECLARATIONS).optional(CREATE).optional(OR, REPLACE);
264        }
265
266        /** Creates parser rules for statements. */
267        private void createStatementRules() {
268                // if/elseif
269                RecognizerBase<EPlsqlParserStates> ifAlternative = inState(STATEMENTS).sequence(EnumSet.of(IF, ELSIF))
270                                .createNode(EShallowEntityType.STATEMENT, 0).skipToWithNesting(THEN, CASE, END).parseUntil(STATEMENTS)
271                                .sequenceBefore(EnumSet.of(ELSIF, ELSE, END));
272                ifAlternative.sequence(END, IF, SEMICOLON).endNode();
273                ifAlternative.endNodeWithContinuation();
274
275                // else (both for if and case)
276                RecognizerBase<EPlsqlParserStates> elseMatcher = inState(STATEMENTS).sequence(ELSE)
277                                .createNode(EShallowEntityType.STATEMENT, 0).parseUntil(STATEMENTS);
278                elseMatcher.sequence(END, IF).skipTo(SEMICOLON).endNode();
279                elseMatcher.sequenceBefore(END, CASE).endNode();
280
281                // loops
282                inState(STATEMENTS).sequence(LOOP).createNode(EShallowEntityType.STATEMENT, 0).parseUntil(STATEMENTS)
283                                .sequence(END, LOOP).skipTo(SEMICOLON).endNode();
284
285                inState(STATEMENTS).sequence(EnumSet.of(WHILE, FOR)).createNode(EShallowEntityType.STATEMENT, 0).skipTo(LOOP)
286                                .parseUntil(STATEMENTS).sequence(END, LOOP).skipTo(SEMICOLON).endNode();
287
288                // blocks
289                inState(STATEMENTS).sequence(DECLARE).createNode(EShallowEntityType.STATEMENT, "block").parseUntil(DECLARATIONS)
290                                .sequence(BEGIN).parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode();
291                inState(STATEMENTS).sequence(BEGIN).createNode(EShallowEntityType.STATEMENT, "block").parseUntil(STATEMENTS)
292                                .sequence(END).skipTo(SEMICOLON).endNode();
293
294                // case
295                inState(STATEMENTS).sequence(CASE).createNode(EShallowEntityType.STATEMENT, 0)
296                                .skipBefore(EnumSet.of(WHEN, ELSE)).parseUntil(STATEMENTS).sequence(END, CASE).skipTo(SEMICOLON)
297                                .endNode();
298
299                // when (in exceptions or case)
300                inState(STATEMENTS).sequence(WHEN).skipTo(THEN).createNode(EShallowEntityType.META, "when", 1).endNode();
301
302                // labels
303                inState(STATEMENTS).sequence(LEFT_LABEL_BRACKET, PLSQL_IDENTIFIERS, RIGHT_LABEL_BRACKET)
304                                .createNode(EShallowEntityType.META, SubTypeNames.LABEL, 1).endNode();
305
306                // basic statement
307                EnumSet<ETokenType> basicStatementStarts = EnumSet.copyOf(PLSQL_IDENTIFIERS);
308                basicStatementStarts.addAll(Arrays.asList(SELF, RETURN, GOTO, FETCH, NULL_LITERAL));
309                inState(STATEMENTS).sequence(basicStatementStarts).createNode(EShallowEntityType.STATEMENT, 0).skipTo(SEMICOLON)
310                                .endNode();
311        }
312
313        /** Matcher for identifiers by a prefix. */
314        private static class IdentifierPrefixMatcher implements ITokenMatcher {
315
316                /** The prefix. */
317                private final String prefix;
318
319                /** Constructor. */
320                public IdentifierPrefixMatcher(String prefix) {
321                        this.prefix = prefix.toLowerCase();
322                }
323
324                /** {@inheritDoc} */
325                @Override
326                public boolean matches(IToken token) {
327                        return token.getType() == IDENTIFIER && token.getText().toLowerCase().startsWith(prefix);
328                }
329        }
330}