001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.shallowparser.languages.cs;
018
019import static eu.cqse.check.framework.scanner.ETokenType.*;
020import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_EXPRESSION;
021import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_METHOD;
022import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_MODULE;
023import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_TYPE;
024import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.TOP_LEVEL;
025
026import java.util.EnumSet;
027
028import org.conqat.lib.commons.region.Region;
029
030import eu.cqse.check.framework.scanner.ETokenType;
031import eu.cqse.check.framework.scanner.ETokenType.ETokenClass;
032import eu.cqse.check.framework.scanner.IToken;
033import eu.cqse.check.framework.shallowparser.SubTypeNames;
034import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType;
035import eu.cqse.check.framework.shallowparser.framework.PropertyAccessNameResolver;
036import eu.cqse.check.framework.shallowparser.framework.RecognizerBase;
037import eu.cqse.check.framework.shallowparser.languages.base.CStyleShallowParserBase;
038import eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates;
039
040/**
041 * Shallow parser for C#
042 * <p>
043 * What this parser does and does not:
044 * <ul>
045 * <li>The parser recognizes types (classes, enums, interfaces), methods and
046 * attributes, and individual statements.</li>
047 * <li>It recognizes the nesting of statements (e.g. in loops), but does not
048 * parse into the statements. For example, it recognizes an if-statement and
049 * provides the list of sub-statements, but does not provide direct access to
050 * the if-condition.</li>
051 * <li>Using statements and annotations are parsed as meta information.</li>
052 * </ul>
053 */
054public class CsShallowParser extends CStyleShallowParserBase {
055
056        /**
057         * A set of all token types that can be used as valid identifiers. See
058         * http://msdn.microsoft.com/en-us/library/x53a06bb.aspx for the full list.
059         */
060        public static final EnumSet<ETokenType> VALID_IDENTIFIERS = EnumSet.of(IDENTIFIER, ADD, ALIAS, ASCENDING, ASYNC,
061                        AWAIT, DESCENDING, DYNAMIC, FROM, GET, GLOBAL, GROUP, INTO, JOIN, LET, ORDERBY, PARTIAL, REMOVE, SELECT,
062                        SET, VALUE, VAR, WHERE, YIELD);
063
064        /** All primitive types. */
065        private static final EnumSet<ETokenType> PRIMITIVE_TYPES = EnumSet.of(VOID, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE,
066                        CHAR, BOOL, STRING, OBJECT, DECIMAL, SBYTE, USHORT, UINT, ULONG);
067
068        /** {@inheritDoc} */
069        @Override
070        protected void createMetaRules() {
071                // using
072                inState(TOP_LEVEL, IN_MODULE).markStart().sequence(USING).optional(STATIC).markStart().skipTo(SEMICOLON)
073                                .createNode(EShallowEntityType.META, SubTypeNames.USING, new Region(0, -2)).endNode();
074
075                // annotations
076                inState(IN_TYPE, IN_MODULE, TOP_LEVEL).sequence(LBRACK).createNode(EShallowEntityType.META, "annotation", 1)
077                                .skipToWithNesting(RBRACK, LBRACK, RBRACK).endNode();
078
079                // preprocessor stuff
080                inAnyState().sequence(PREPROCESSOR_DIRECTIVE).createNode(EShallowEntityType.META, 0).endNode();
081
082                super.createMetaRules();
083        }
084
085        /** {@inheritDoc} */
086        @Override
087        protected void createTypeRules() {
088                // namespace
089                inState(TOP_LEVEL, IN_MODULE).sequence(NAMESPACE, getValidIdentifiers()).skipTo(LBRACE)
090                                .createNode(EShallowEntityType.MODULE, 0, new Region(1, -2)).parseUntil(IN_MODULE).sequence(RBRACE)
091                                .endNode();
092
093                super.createTypeRules();
094        }
095
096        /** {@inheritDoc} */
097        @Override
098        protected EnumSet<ETokenType> getTypeKeywords() {
099                return EnumSet.of(CLASS, INTERFACE, ENUM, STRUCT);
100        }
101
102        /** {@inheritDoc} */
103        @Override
104        protected EnumSet<ETokenType> getTypeModifier() {
105                return EnumSet.of(PUBLIC, PRIVATE, ABSTRACT, SEALED, INTERNAL, PARTIAL, STATIC);
106        }
107
108        /** Returns both type and type member modifiers */
109        private EnumSet<ETokenType> getTypeAndMemberModifiers() {
110                EnumSet<ETokenType> allModifiers = getTypeModifier();
111                allModifiers.addAll(
112                                EnumSet.of(PROTECTED, VIRTUAL, ASYNC, CONST, EVENT, EXTERN, OVERRIDE, READONLY, UNSAFE, VOLATILE, NEW));
113                return allModifiers;
114        }
115
116        /** Returns modifiers that are applicable to events */
117        private static EnumSet<ETokenType> getEventModifiers() {
118                return EnumSet.of(PRIVATE, PROTECTED, PUBLIC, INTERNAL, STATIC, VIRTUAL, SEALED, ABSTRACT);
119        }
120
121        /** {@inheritDoc} */
122        @Override
123        protected void createClassElementsRules() {
124                // simple enum literals
125                inState(IN_TYPE).sequence(IDENTIFIER).sequenceBefore(EnumSet.of(COMMA, EQ, RBRACE))
126                                .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0)
127                                .skipBefore(EnumSet.of(COMMA, RBRACE)).optional(COMMA).endNode();
128
129                // delegates
130                typePattern(inState(TOP_LEVEL, IN_MODULE, IN_TYPE).sequence(DELEGATE)).sequence(getValidIdentifiers(), LPAREN)
131                                .createNode(EShallowEntityType.METHOD, 0, -2).skipTo(RPAREN).skipTo(SEMICOLON).endNode();
132
133                createMethodRules();
134
135                // event rules should be in front of properties, since otherwise properties
136                // rules will catch events as well
137                createEventsRules();
138                createPropertiesRules();
139
140                // attributes, e.g., fields (must be after method, as this would also
141                // match methods)
142                typePatternInState(IN_TYPE).sequence(getValidIdentifiers())
143                                .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ATTRIBUTE, -1)
144                                .skipToWithNesting(SEMICOLON, LBRACE, RBRACE, getSubExpressionRecognizer()).endNode();
145
146                // static initializer, get/set for properties, add/remove in events
147                inState(IN_TYPE).sequence(EnumSet.of(GET, SET), SEMICOLON).createNode(EShallowEntityType.METHOD,
148                                new Object[] { "empty", 0 }, new PropertyAccessNameResolver<EGenericParserStates>()).endNode();
149                inState(IN_TYPE).sequence(EnumSet.of(GET, SET), LBRACE)
150                                .createNode(EShallowEntityType.METHOD, 0, new PropertyAccessNameResolver<EGenericParserStates>())
151                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
152                inState(IN_TYPE).sequence(EnumSet.of(GET, SET), DOUBLE_ARROW)
153                                .createNode(EShallowEntityType.METHOD, 0, new PropertyAccessNameResolver<EGenericParserStates>())
154                                .parseOnce(IN_METHOD).endNode();
155                inState(IN_TYPE).sequence(EnumSet.of(STATIC, ADD, REMOVE), LBRACE).createNode(EShallowEntityType.METHOD, 0)
156                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
157        }
158
159        /** Creates the rules for parsing events. */
160        private void createEventsRules() {
161                RecognizerBase<EGenericParserStates> eventRecognizer = inState(TOP_LEVEL, IN_MODULE, IN_TYPE)
162                                .repeated(getEventModifiers()).markStart().sequence(EVENT).sequence(getValidIdentifiers())
163                                .skipNested(LT, GT).repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer())
164                                .sequence(getValidIdentifiers()).createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.EVENT, -1);
165                eventRecognizer.sequence(LBRACE).parseUntil(IN_TYPE).sequence(RBRACE).endNode();
166                eventRecognizer.skipTo(SEMICOLON).endNode();
167        }
168
169        /** Creates the rules for parsing properties. */
170        private void createPropertiesRules() {
171                RecognizerBase<EGenericParserStates> alternatives = typePatternInState(IN_TYPE)
172                                .repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer()).sequence(getValidIdentifiers());
173
174                alternatives.sequence(DOUBLE_ARROW).createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.PROPERTY, -2)
175                                .parseOnce(IN_METHOD).endNode();
176
177                RecognizerBase<EGenericParserStates> braceAlternatives = alternatives.sequence(LBRACE)
178                                .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.PROPERTY, -2).parseUntil(IN_TYPE)
179                                .sequence(RBRACE);
180                braceAlternatives.sequence(EQ).parseOnce(IN_METHOD).endNode();
181                braceAlternatives.endNode();
182        }
183
184        /**
185         * Creates the rules for all method-like constructs inside types.
186         */
187        private void createMethodRules() {
188                // indexers
189                completeMethod("indexer", EShallowEntityType.ATTRIBUTE, IN_TYPE,
190                                typePatternInState(IN_TYPE).repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer())
191                                                .markStart()
192                                                // could contain attributes
193                                                .sequence(THIS, LBRACK).skipToWithNesting(RBRACK, LBRACK, RBRACK));
194
195                // operator overloading
196                createOperatorOverloadingRules();
197
198                // methods
199                createMemberMethodRules();
200
201                // constructor
202                finishConstructorLike(inState(IN_TYPE).repeated(EnumSet.of(PRIVATE, PROTECTED, PUBLIC, INTERNAL)).markStart(),
203                                SubTypeNames.CONSTRUCTOR);
204
205                // static constructor
206                finishConstructorLike(inState(IN_TYPE).sequence(STATIC).markStart(), SubTypeNames.STATIC_CONSTRUCTOR);
207
208                // destructor
209                finishConstructorLike(
210                                inState(IN_TYPE).repeated(EnumSet.of(PRIVATE, PROTECTED, PUBLIC, INTERNAL)).sequence(COMP).markStart(),
211                                SubTypeNames.DESTRUCTOR);
212        }
213
214        /** Rules for member methods */
215        private void createMemberMethodRules() {
216                // we have to skip the parameter list with nesting because of tuples (C#7)
217                completeMethod("method", EShallowEntityType.METHOD, IN_METHOD,
218                                typePatternInState(IN_TYPE).repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer())
219                                                .markStart().sequence(getValidIdentifiers()).skipNested(LT, GT).sequence(LPAREN)
220                                                .skipToWithNesting(RPAREN, LPAREN, RPAREN));
221
222                // rule that recognizes methods with tuples as return type (C#7)
223                completeMethod("method", EShallowEntityType.METHOD, IN_METHOD,
224                                inState(IN_TYPE).repeated(getTypeAndMemberModifiers()).sequence(LPAREN)
225                                                .skipToWithNesting(RPAREN, LPAREN, RPAREN)
226                                                .repeatedSubRecognizer(createExplicitInterfaceQualifierRecognizer()).markStart()
227                                                .sequence(getValidIdentifiers()).skipNested(LT, GT).sequence(LPAREN)
228                                                .skipToWithNesting(RPAREN, LPAREN, RPAREN));
229        }
230
231        /** Rules for operator overloading. */
232        private void createOperatorOverloadingRules() {
233                EnumSet<ETokenType> primitiveOrIdentifier = EnumSet.copyOf(PRIMITIVE_TYPES);
234                primitiveOrIdentifier.add(IDENTIFIER);
235
236                completeMethod(SubTypeNames.OPERATOR, EShallowEntityType.METHOD, IN_METHOD,
237                                inState(IN_TYPE).repeated(EnumSet.of(PRIVATE, PROTECTED, PUBLIC, INTERNAL, STATIC))
238                                                .sequence(EnumSet.of(IMPLICIT, EXPLICIT), OPERATOR).markStart()
239                                                .sequence(primitiveOrIdentifier, LPAREN).skipToWithNesting(RPAREN, LPAREN, RPAREN));
240                completeMethod(SubTypeNames.OPERATOR, EShallowEntityType.METHOD, IN_METHOD,
241                                typePatternInState(IN_TYPE).sequence(OPERATOR).markStart()
242                                                .sequence(EnumSet.of(ETokenClass.OPERATOR, ETokenClass.KEYWORD), LPAREN)
243                                                .skipToWithNesting(RPAREN, LPAREN, RPAREN));
244        }
245
246        /**
247         * Finishes a recognizer that begins parsing a constructor like method. Those
248         * are constructors, static constructors and destructors. The corresponding
249         * subtype must be passed.
250         */
251        private void finishConstructorLike(RecognizerBase<EGenericParserStates> recognizer, String subtype) {
252                RecognizerBase<EGenericParserStates> alternative = recognizer.sequence(getValidIdentifiers(), LPAREN)
253                                .skipTo(RPAREN).skipBeforeWithNesting(EnumSet.of(LBRACE, DOUBLE_ARROW), LPAREN, RPAREN);
254
255                RecognizerBase<EGenericParserStates> lambdaAlternative = alternative.sequence(DOUBLE_ARROW)
256                                .createNode(EShallowEntityType.METHOD, subtype, 0);
257                lambdaAlternative.sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE).endNode();
258                lambdaAlternative.parseOnce(IN_METHOD).endNode();
259
260                alternative.sequence(LBRACE).createNode(EShallowEntityType.METHOD, subtype, 0).parseUntil(IN_METHOD)
261                                .sequence(RBRACE).endNode();
262        }
263
264        /**
265         * Creates a new recognizer that can match an explicit interface qualifier
266         * prefix for a method-like construct. This includes sequences of identifiers
267         * with dots, possibly intermixed with template arguments.
268         */
269        private RecognizerBase<EGenericParserStates> createExplicitInterfaceQualifierRecognizer() {
270                // remember the start of the recognizer chain (we can not use the
271                // result of the method chain, as this would be the last recognizer)
272                return createRecognizer(start -> start.sequence(getValidIdentifiers()).skipNested(LT, GT).sequence(DOT));
273        }
274
275        /**
276         * Completes a method-like construct. This begins with searching for the first
277         * semicolon or brace, i.e., the parameter list should already be skipped. This
278         * ends either in a complete method with a body, or with a semicolon and thus is
279         * just an abstract method.
280         */
281        private static void completeMethod(String name, EShallowEntityType nodeType, EGenericParserStates subParseState,
282                        RecognizerBase<EGenericParserStates> start) {
283                RecognizerBase<EGenericParserStates> alternative = start
284                                .skipBefore(EnumSet.of(LBRACE, SEMICOLON, DOUBLE_ARROW));
285
286                // for lambdas, we always parse IN_METHOD and ignore the subParseState
287                RecognizerBase<EGenericParserStates> lambdaAlternative = alternative.sequence(DOUBLE_ARROW).createNode(nodeType,
288                                name, 0);
289                lambdaAlternative.sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE).endNode();
290                lambdaAlternative.parseOnce(IN_METHOD).endNode();
291
292                alternative.sequence(LBRACE).createNode(nodeType, name, 0).parseUntil(subParseState).sequence(RBRACE).endNode();
293                alternative.sequence(SEMICOLON).createNode(nodeType, "abstract " + name, 0).endNode();
294        }
295
296        @Override
297        protected void createStatementRules() {
298                createLocalFunctionRules();
299                super.createStatementRules();
300        }
301
302        /**
303         * Creates rules for handling of local functions.
304         * 
305         * @see <a href=
306         *      "https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/local-functions">Local
307         *      functions (C# Programming Guide)</a>
308         */
309        private void createLocalFunctionRules() {
310                EnumSet<ETokenType> typeStart = EnumSet.copyOf(PRIMITIVE_TYPES);
311                typeStart.addAll(getValidIdentifiers());
312                inState(IN_METHOD)
313                                // method declarations do never start with AWAIT. That is a function call (e.g.,
314                                // await foo(); )
315                                .notPreCondition(createRecognizer(start -> start.sequence(ETokenType.AWAIT)))
316                                .optional(EnumSet.of(ASYNC, UNSAFE)).sequence(typeStart).optional(QUESTION).repeated(DOT, typeStart)
317                                .skipNested(LT, GT).repeated(MULT).repeatedSubRecognizer(new ArrayBracketsRecognizer()).markStart()
318                                .sequence(getValidIdentifiers(), LPAREN)
319                                .createNode(EShallowEntityType.METHOD, SubTypeNames.LOCAL_FUNCTION, 0).skipTo(LBRACE)
320                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
321
322                // tuple variant
323                inState(IN_METHOD).optional(EnumSet.of(ASYNC, UNSAFE)).sequence(LPAREN)
324                                .skipToWithNesting(RPAREN, LPAREN, RPAREN).markStart().sequence(getValidIdentifiers(), LPAREN)
325                                .createNode(EShallowEntityType.METHOD, SubTypeNames.LOCAL_FUNCTION, 0).skipTo(LBRACE)
326                                .parseUntil(IN_METHOD).sequence(RBRACE).endNode();
327
328        }
329
330        /** {@inheritDoc} */
331        @Override
332        protected void createCaseRule() {
333                super.createCaseRule();
334
335                // C# also allows any kind of constant expression as a case label, e.g:
336                // Foo.BAR + Foo.GOO << 12
337                inState(IN_METHOD).markStart().sequence(CASE).skipTo(COLON).createNode(EShallowEntityType.META, 0).endNode();
338        }
339
340        /**
341         * {@inheritDoc}
342         * <p>
343         * Also returns all contextual keywords, as they are valid identifiers in the
344         * language. See http://msdn.microsoft.com/en-us/library/x53a06bb.aspx for the
345         * full list.
346         */
347        @Override
348        protected EnumSet<ETokenType> getValidIdentifiers() {
349                return VALID_IDENTIFIERS;
350        }
351
352        /** {@inheritDoc} */
353        @Override
354        protected EnumSet<ETokenType> getSimpleBlockKeywordsWithParentheses() {
355                return EnumSet.of(WHILE, FOR, SWITCH, LOCK, USING, FIXED, FOREACH);
356        }
357
358        /** {@inheritDoc} */
359        @Override
360        protected EnumSet<ETokenType> getSimpleBlockKeywordsWithoutParentheses() {
361                return EnumSet.of(ELSE, FINALLY, CHECKED, UNCHECKED, UNSAFE);
362        }
363
364        /** {@inheritDoc} */
365        @Override
366        protected EnumSet<ETokenType> getStatementStartTokens() {
367                // literals are necessary for statements within arrow methods
368                return EnumSet.of(NEW, BREAK, CONTINUE, RETURN, ASSERT, CONST, GOTO, BASE, THROW, THIS, CHECKED, SIZEOF,
369                                STACKALLOC, TYPEOF, VALUE, YIELD, LPAREN, PLUSPLUS, MINUSMINUS, NOT, PLUS, MINUS, COMP, TRUE, FALSE,
370                                INTEGER_LITERAL, FLOATING_POINT_LITERAL, STRING_LITERAL, IDENTIFIER);
371        }
372
373        /** {@inheritDoc} */
374        @Override
375        protected RecognizerBase<EGenericParserStates> typePattern(RecognizerBase<EGenericParserStates> currentState) {
376                EnumSet<ETokenType> modifierKeywords = getTypeAndMemberModifiers();
377                EnumSet<ETokenType> typeStart = EnumSet.copyOf(PRIMITIVE_TYPES);
378                typeStart.addAll(getValidIdentifiers());
379
380                // we include "?" in the skipping section to deal with nullable types
381                // (e.g. Foo? foo;)
382                // the repeated (DOT, typeStart) is used for full qualified type names
383                return currentState.repeated(modifierKeywords).sequence(typeStart).repeated(DOT, typeStart).skipNested(LT, GT)
384                                .repeated(EnumSet.of(QUESTION, MULT)).repeatedSubRecognizer(new ArrayBracketsRecognizer());
385        }
386
387        /** {@inheritDoc} */
388        @Override
389        protected void createSubExpressionRules() {
390                // anonymous delegate methods
391                inState(IN_EXPRESSION).sequence(DELEGATE, LPAREN)
392                                .createNode(EShallowEntityType.METHOD, SubTypeNames.ANONYMOUS_METHOD)
393                                .skipToWithNesting(RPAREN, LPAREN, RPAREN).sequence(LBRACE).parseUntil(IN_METHOD).sequence(RBRACE)
394                                .endNode();
395
396                createLambdaWithArrowRules(DOUBLE_ARROW);
397        }
398
399        /** {@inheritDoc} */
400        @Override
401        protected RecognizerBase<EGenericParserStates> getSubExpressionRecognizer() {
402                return new CsDelegateAndLambdaRecognizer();
403        }
404
405        /** {@inheritDoc} */
406        @Override
407        protected boolean isFilteredToken(IToken token, IToken previousToken) {
408                return super.isFilteredToken(token, previousToken) || isPragmaWarning(token);
409        }
410
411        private static boolean isPragmaWarning(IToken token) {
412                return token.getType() == ETokenType.PREPROCESSOR_DIRECTIVE && token.getText().startsWith("#pragma warning");
413        }
414
415}