001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.ada;
018
019import static eu.cqse.check.framework.scanner.ETokenType.ABORT;
020import static eu.cqse.check.framework.scanner.ETokenType.ABSTRACT;
021import static eu.cqse.check.framework.scanner.ETokenType.ACCEPT;
022import static eu.cqse.check.framework.scanner.ETokenType.AND;
023import static eu.cqse.check.framework.scanner.ETokenType.BEGIN;
024import static eu.cqse.check.framework.scanner.ETokenType.BODY;
025import static eu.cqse.check.framework.scanner.ETokenType.CASE;
026import static eu.cqse.check.framework.scanner.ETokenType.COLON;
027import static eu.cqse.check.framework.scanner.ETokenType.DECLARE;
028import static eu.cqse.check.framework.scanner.ETokenType.DELAY;
029import static eu.cqse.check.framework.scanner.ETokenType.DO;
030import static eu.cqse.check.framework.scanner.ETokenType.ELSE;
031import static eu.cqse.check.framework.scanner.ETokenType.ELSEIF;
032import static eu.cqse.check.framework.scanner.ETokenType.END;
033import static eu.cqse.check.framework.scanner.ETokenType.ENDRECORD;
034import static eu.cqse.check.framework.scanner.ETokenType.ENTRY;
035import static eu.cqse.check.framework.scanner.ETokenType.EQ;
036import static eu.cqse.check.framework.scanner.ETokenType.EXCEPTION;
037import static eu.cqse.check.framework.scanner.ETokenType.EXIT;
038import static eu.cqse.check.framework.scanner.ETokenType.FOR;
039import static eu.cqse.check.framework.scanner.ETokenType.FUNCTION;
040import static eu.cqse.check.framework.scanner.ETokenType.GENERIC;
041import static eu.cqse.check.framework.scanner.ETokenType.GOTO;
042import static eu.cqse.check.framework.scanner.ETokenType.GT;
043import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER;
044import static eu.cqse.check.framework.scanner.ETokenType.IF;
045import static eu.cqse.check.framework.scanner.ETokenType.IS;
046import static eu.cqse.check.framework.scanner.ETokenType.LEFT_LABEL_BRACKET;
047import static eu.cqse.check.framework.scanner.ETokenType.LOOP;
048import static eu.cqse.check.framework.scanner.ETokenType.NEW;
049import static eu.cqse.check.framework.scanner.ETokenType.NULL;
050import static eu.cqse.check.framework.scanner.ETokenType.OR;
051import static eu.cqse.check.framework.scanner.ETokenType.PACKAGE;
052import static eu.cqse.check.framework.scanner.ETokenType.PRAGMA;
053import static eu.cqse.check.framework.scanner.ETokenType.PREPROCESSOR_DIRECTIVE;
054import static eu.cqse.check.framework.scanner.ETokenType.PROCEDURE;
055import static eu.cqse.check.framework.scanner.ETokenType.PROTECTED;
056import static eu.cqse.check.framework.scanner.ETokenType.RAISE;
057import static eu.cqse.check.framework.scanner.ETokenType.RECORD;
058import static eu.cqse.check.framework.scanner.ETokenType.RENAMES;
059import static eu.cqse.check.framework.scanner.ETokenType.RETURN;
060import static eu.cqse.check.framework.scanner.ETokenType.RIGHT_LABEL_BRACKET;
061import static eu.cqse.check.framework.scanner.ETokenType.SELECT;
062import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON;
063import static eu.cqse.check.framework.scanner.ETokenType.SEPARATE;
064import static eu.cqse.check.framework.scanner.ETokenType.STRING_LITERAL;
065import static eu.cqse.check.framework.scanner.ETokenType.SUBTYPE;
066import static eu.cqse.check.framework.scanner.ETokenType.TASK;
067import static eu.cqse.check.framework.scanner.ETokenType.TERMINATE;
068import static eu.cqse.check.framework.scanner.ETokenType.THEN;
069import static eu.cqse.check.framework.scanner.ETokenType.TYPE;
070import static eu.cqse.check.framework.scanner.ETokenType.USE;
071import static eu.cqse.check.framework.scanner.ETokenType.WHEN;
072import static eu.cqse.check.framework.scanner.ETokenType.WHILE;
073import static eu.cqse.check.framework.scanner.ETokenType.WITH;
074import static eu.cqse.check.framework.shallowparser.languages.ada.AdaShallowParser.EAdaParserStates.DECLARATIONS;
075import static eu.cqse.check.framework.shallowparser.languages.ada.AdaShallowParser.EAdaParserStates.STATEMENTS;
076
077import java.util.EnumSet;
078
079import eu.cqse.check.framework.scanner.ETokenType;
080import eu.cqse.check.framework.scanner.IToken;
081import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
082import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
083import eu.cqse.check.framework.shallowparser.framework.ShallowParserBase;
084
085/**
086 * Shallow parser for Ada.
087 * <p>
088 * A good introduction to Ada can be found here:
089 * http://en.wikibooks.org/wiki/Ada_Programming
090 * <p>
091 * A reference is available here:
092 * http://www.adaic.org/resources/add_content/standards/05rm/html/RM-TOC.html
093 * <p>
094 * What this parser does and does not:
095 * <ul>
096 * <li>The "with" and "use" statements as well as pragmas are parsed as meta
097 * data.</li>
098 * <li>Packages, types, functions, procedures, entries are parsed as
099 * expected.</li>
100 * <li>It recognizes the nesting of statements (e.g. in loops), but does not
101 * parse into the statements. For example, it recognizes an if-statement and
102 * provides the list of sub-statements, but does not provide direct access to
103 * the if-condition.</li>
104 * <li>The parser does not support non-ASCII characters in identifiers, although
105 * Ada95 supports unicode here. Actually, this is not an issue of the parser but
106 * the underlying scanner.</li>
107 * </ul>
108 * <p>
109 * Implementation hints:
110 * <ul>
111 * <li>Several rules are registered for any state although they would be
112 * expected to occur only in state DECLARATIONS. This is to allow the parser to
113 * recover from state STATEMENTS, if an arbitrary chunk of Ada code is to be
114 * parsed.</li>
115 * </ul>
116 */
117public class AdaShallowParser extends ShallowParserBase<AdaShallowParser.EAdaParserStates> {
118
119        /** The states used in this parser. */
120        public static enum EAdaParserStates {
121
122                /** A state to recognize declarations. */
123                DECLARATIONS,
124
125                /** A state to recognize statements. */
126                STATEMENTS
127        }
128
129        /** Constructor. */
130        public AdaShallowParser() {
131                super(EAdaParserStates.class, DECLARATIONS);
132
133                createMetaRules();
134                createSpecificationRules();
135                createBodyAndTypeRules();
136                createMethodAndAttributeRules();
137                createTypeRules();
138                createStatementRules();
139        }
140
141        /** Create rules for parsing meta elements. */
142        private void createMetaRules() {
143                // parse generic prefix as meta
144                // we skip from "with" to ";" to not stumble over subprogram parameters
145                inAnyState().sequence(GENERIC).createNode(EShallowEntityType.META, 0)
146                                .skipBeforeWithNesting(EnumSet.of(PACKAGE, PROCEDURE, FUNCTION, ENTRY), WITH, SEMICOLON)
147                                .endNodeWithContinuation();
148
149                // parse use and with as meta
150                inAnyState().sequence(EnumSet.of(WITH, USE, PRAGMA)).createNode(EShallowEntityType.META, 0).skipTo(SEMICOLON)
151                                .endNode();
152
153                // parse pragma as meta
154                inAnyState().sequence(PREPROCESSOR_DIRECTIVE).createNode(EShallowEntityType.META, "pragma").endNode();
155
156                // deal with dangling end by inserting broken node
157                inAnyState().sequence(END).createNode(EShallowEntityType.META, "dangling end").skipTo(SEMICOLON); // endNode()
158                                                                                                                                                                                                                        // omitted!
159        }
160
161        /** Creates parsing rules for package and task specifications. */
162        private void createSpecificationRules() {
163                // package specification
164                RecognizerBase<EAdaParserStates> packageSpecAlternative = inAnyState().sequence(PACKAGE, IDENTIFIER)
165                                .skipBefore(EnumSet.of(SEMICOLON, IS, RENAMES));
166                packageSpecAlternative.sequence(SEMICOLON).createNode(EShallowEntityType.MODULE, "package specification", -2)
167                                .endNode();
168                packageSpecAlternative.sequence(RENAMES).createNode(EShallowEntityType.MODULE, "package renaming", -2)
169                                .skipTo(SEMICOLON).endNode();
170                packageSpecAlternative.sequence(IS, NEW)
171                                .createNode(EShallowEntityType.TYPE, "generic package instantiation", -3).skipTo(SEMICOLON).endNode();
172                packageSpecAlternative.sequence(IS).createNode(EShallowEntityType.MODULE, "package specification", -2)
173                                .parseUntil(DECLARATIONS).sequence(END).skipTo(SEMICOLON).endNode();
174
175                // task specification
176                RecognizerBase<EAdaParserStates> taskSpecAlternative = inAnyState()
177                                .sequence(EnumSet.of(TASK, PROTECTED), IDENTIFIER)
178                                .createNode(EShallowEntityType.MODULE, new Object[] { 0, "specification" }, -1)
179                                .skipBefore(EnumSet.of(SEMICOLON, IS));
180                taskSpecAlternative.sequence(SEMICOLON).endNode();
181                taskSpecAlternative.sequence(IS).parseUntil(DECLARATIONS).sequence(END).skipTo(SEMICOLON).endNode();
182        }
183
184        /**
185         * Creates parsing rules for package/task/protected body and task/protected
186         * type.
187         */
188        private void createBodyAndTypeRules() {
189                // package body, task body, protected body
190                RecognizerBase<EAdaParserStates> packageBodyAlternative1 = inAnyState()
191                                .sequence(EnumSet.of(PACKAGE, TASK, PROTECTED), BODY, IDENTIFIER).skipTo(IS)
192                                .createNode(EShallowEntityType.MODULE, new int[] { 0, 1 }, -2);
193                packageBodyAlternative1.sequence(SEPARATE, SEMICOLON).endNode();
194                RecognizerBase<EAdaParserStates> packageBodyAlternative2 = packageBodyAlternative1.parseUntil(DECLARATIONS);
195                packageBodyAlternative2.sequence(END).skipTo(SEMICOLON).endNode();
196                completeBlock(packageBodyAlternative2.sequence(BEGIN));
197
198                // task types and protected types
199                RecognizerBase<EAdaParserStates> taskTypeAlternative = inAnyState()
200                                .sequence(EnumSet.of(TASK, PROTECTED), TYPE, IDENTIFIER)
201                                .createNode(EShallowEntityType.MODULE, new int[] { 0, 1 }).skipBefore(EnumSet.of(SEMICOLON, IS));
202                taskTypeAlternative.sequence(SEMICOLON).endNode();
203                taskTypeAlternative.sequence(IS).parseUntil(DECLARATIONS).sequence(END).skipTo(SEMICOLON).endNode();
204
205                // new...with is skipped
206                inState(DECLARATIONS).sequence(NEW).skipTo(WITH);
207        }
208
209        /**
210         * Creates rules for parsing methods (functions, etc.) and attributes (and
211         * local variables).
212         */
213        private void createMethodAndAttributeRules() {
214                // functions, procedures (including operator overloading), entries
215                RecognizerBase<EAdaParserStates> functionAlternative = inAnyState()
216                                .sequence(EnumSet.of(PROCEDURE, FUNCTION, ENTRY), EnumSet.of(IDENTIFIER, STRING_LITERAL))
217                                .createNode(EShallowEntityType.METHOD, 0, 1).skipBefore(EnumSet.of(SEMICOLON, IS));
218                functionAlternative.sequence(SEMICOLON).endNode();
219                functionAlternative.sequence(IS, EnumSet.of(SEPARATE, ABSTRACT, NEW)).skipTo(SEMICOLON).endNode();
220                completeBlock(functionAlternative.sequence(IS).parseUntil(DECLARATIONS).sequence(BEGIN));
221
222                // variables and constants
223                inState(DECLARATIONS).sequence(IDENTIFIER, COLON).createNode(EShallowEntityType.ATTRIBUTE, "variable", 0)
224                                .skipTo(SEMICOLON).endNode();
225        }
226
227        /** Creates rules for parsing types and similar constructs. */
228        private void createTypeRules() {
229                // types/subtypes
230                RecognizerBase<EAdaParserStates> typeAlternative = inAnyState().sequence(EnumSet.of(TYPE, SUBTYPE), IDENTIFIER)
231                                .createNode(EShallowEntityType.TYPE, 0, 1).skipBefore(EnumSet.of(SEMICOLON, IS));
232                typeAlternative.sequence(SEMICOLON).endNode();
233                typeAlternative.sequence(IS, NULL, RECORD, SEMICOLON).endNode();
234
235                RecognizerBase<EAdaParserStates> typeAlternative2 = typeAlternative.sequence(IS)
236                                .skipBefore(EnumSet.of(SEMICOLON, RECORD, NULL));
237                typeAlternative2.sequence(SEMICOLON).endNode();
238                typeAlternative2.sequence(NULL).skipTo(SEMICOLON).endNode();
239                typeAlternative2.sequence(RECORD).skipTo(END, ENDRECORD, SEMICOLON).endNode();
240
241                // representation clauses
242                RecognizerBase<EAdaParserStates> overlayAlternative = inState(DECLARATIONS).sequence(FOR)
243                                .createNode(EShallowEntityType.TYPE, "representation clause").skipBefore(EnumSet.of(SEMICOLON, RECORD));
244                overlayAlternative.sequence(SEMICOLON).endNode();
245                overlayAlternative.sequence(RECORD).skipTo(END, ENDRECORD, SEMICOLON).endNode();
246        }
247
248        /** Creates the rules needed for parsing statements. */
249        private void createStatementRules() {
250                // if/elseif
251                RecognizerBase<EAdaParserStates> ifAlternative = inState(STATEMENTS).sequence(EnumSet.of(IF, ELSEIF))
252                                .createNode(EShallowEntityType.STATEMENT, 0).skipTo(THEN).parseUntil(STATEMENTS)
253                                .sequenceBefore(EnumSet.of(ELSEIF, ELSE, END));
254                ifAlternative.sequence(END).skipTo(SEMICOLON).endNode();
255                ifAlternative.endNodeWithContinuation();
256
257                // else (both for if and select)
258                inState(STATEMENTS).sequence(ELSE).createNode(EShallowEntityType.STATEMENT, 0).parseUntil(STATEMENTS)
259                                .sequence(END).skipTo(SEMICOLON).endNode();
260
261                // case
262                inState(STATEMENTS).sequence(CASE).createNode(EShallowEntityType.STATEMENT, 0).skipTo(IS).parseUntil(STATEMENTS)
263                                .sequence(END).skipTo(SEMICOLON).endNode();
264
265                // when (in case, select, and exception handlers)
266                inState(STATEMENTS).sequence(WHEN).createNode(EShallowEntityType.STATEMENT, 0).skipTo(EQ, GT).endNode();
267
268                // loop/block labels as meta
269                inState(STATEMENTS).sequence(IDENTIFIER, COLON).sequenceBefore(EnumSet.of(WHILE, FOR, LOOP, BEGIN))
270                                .createNode(EShallowEntityType.META, "Loop name").endNode();
271
272                // loops
273                inState(STATEMENTS).sequence(LOOP).createNode(EShallowEntityType.STATEMENT, 0).parseUntil(STATEMENTS)
274                                .sequence(END).skipTo(SEMICOLON).endNode();
275                inState(STATEMENTS).sequence(EnumSet.of(WHILE, FOR)).createNode(EShallowEntityType.STATEMENT, 0).skipTo(LOOP)
276                                .parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode();
277
278                // blocks
279                completeBlock(inState(STATEMENTS).sequence(IDENTIFIER, COLON, DECLARE)
280                                .createNode(EShallowEntityType.STATEMENT, "block").parseUntil(DECLARATIONS).sequence(BEGIN));
281                completeBlock(inState(STATEMENTS).sequence(DECLARE).createNode(EShallowEntityType.STATEMENT, "block")
282                                .parseUntil(DECLARATIONS).sequence(BEGIN));
283                completeBlock(inState(STATEMENTS).sequence(BEGIN).createNode(EShallowEntityType.STATEMENT, "block"));
284
285                // accept/do
286                inState(STATEMENTS).sequence(ACCEPT).createNode(EShallowEntityType.STATEMENT, 0).skipTo(DO)
287                                .parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode();
288
289                // select/or
290                RecognizerBase<EAdaParserStates> selectAlternative = inState(STATEMENTS).sequence(EnumSet.of(SELECT, OR, ELSE))
291                                .createNode(EShallowEntityType.STATEMENT, 0).parseUntil(STATEMENTS);
292                selectAlternative.sequenceBefore(EnumSet.of(OR, ELSE)).endNodeWithContinuation();
293                selectAlternative.sequence(END).skipTo(SEMICOLON).endNode();
294
295                // goto labels as meta
296                inState(STATEMENTS).sequence(LEFT_LABEL_BRACKET).skipTo(RIGHT_LABEL_BRACKET)
297                                .createNode(EShallowEntityType.META, "goto label").endNode();
298
299                // basic statement
300                inState(STATEMENTS).sequence(EnumSet.of(IDENTIFIER, NULL, RETURN, GOTO, EXIT, ABORT, DELAY, RAISE, TERMINATE))
301                                .createNode(EShallowEntityType.STATEMENT, 0).skipTo(SEMICOLON).endNode();
302        }
303
304        /** Completes the rule describing blocks. */
305        private static void completeBlock(RecognizerBase<EAdaParserStates> initialSequence) {
306                RecognizerBase<EAdaParserStates> alt = initialSequence.parseUntil(STATEMENTS);
307                alt.sequence(EXCEPTION).parseUntil(STATEMENTS).sequence(END).skipTo(SEMICOLON).endNode();
308                alt.sequence(END).skipTo(SEMICOLON).endNode();
309        }
310
311        /**
312         * {@inheritDoc}
313         * <p>
314         * Maps "and then" and "or else" to simple "and" and "or", as the additional
315         * "then" and "else" keywords may shallow parsing much harder.
316         */
317        @Override
318        protected boolean isFilteredToken(IToken token, IToken previousToken) {
319                ETokenType previousType = null;
320                if (previousToken != null) {
321                        previousType = previousToken.getType();
322                }
323                ETokenType type = token.getType();
324
325                if (previousType == AND && type == THEN) {
326                        return true;
327                }
328                if (previousType == OR && type == ELSE) {
329                        return true;
330                }
331                return super.isFilteredToken(token, previousToken);
332        }
333}