001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.shallowparser.languages.java; 018 019import static eu.cqse.check.framework.scanner.ETokenType.ABSTRACT; 020import static eu.cqse.check.framework.scanner.ETokenType.ANNOTATION_INTERFACE; 021import static eu.cqse.check.framework.scanner.ETokenType.ARROW; 022import static eu.cqse.check.framework.scanner.ETokenType.ASSERT; 023import static eu.cqse.check.framework.scanner.ETokenType.AT_OPERATOR; 024import static eu.cqse.check.framework.scanner.ETokenType.BOOLEAN; 025import static eu.cqse.check.framework.scanner.ETokenType.BREAK; 026import static eu.cqse.check.framework.scanner.ETokenType.BYTE; 027import static eu.cqse.check.framework.scanner.ETokenType.CASE; 028import static eu.cqse.check.framework.scanner.ETokenType.CHAR; 029import static eu.cqse.check.framework.scanner.ETokenType.CLASS; 030import static eu.cqse.check.framework.scanner.ETokenType.COLON; 031import static eu.cqse.check.framework.scanner.ETokenType.COMMA; 032import static eu.cqse.check.framework.scanner.ETokenType.CONTINUE; 033import static eu.cqse.check.framework.scanner.ETokenType.DEFAULT; 034import static eu.cqse.check.framework.scanner.ETokenType.DOT; 035import static eu.cqse.check.framework.scanner.ETokenType.DOUBLE; 036import static eu.cqse.check.framework.scanner.ETokenType.ELSE; 037import static eu.cqse.check.framework.scanner.ETokenType.ENUM; 038import static eu.cqse.check.framework.scanner.ETokenType.FINAL; 039import static eu.cqse.check.framework.scanner.ETokenType.FINALLY; 040import static eu.cqse.check.framework.scanner.ETokenType.FLOAT; 041import static eu.cqse.check.framework.scanner.ETokenType.FOR; 042import static eu.cqse.check.framework.scanner.ETokenType.GOTO; 043import static eu.cqse.check.framework.scanner.ETokenType.GT; 044import static eu.cqse.check.framework.scanner.ETokenType.IDENTIFIER; 045import static eu.cqse.check.framework.scanner.ETokenType.IMPORT; 046import static eu.cqse.check.framework.scanner.ETokenType.INT; 047import static eu.cqse.check.framework.scanner.ETokenType.INTERFACE; 048import static eu.cqse.check.framework.scanner.ETokenType.LBRACE; 049import static eu.cqse.check.framework.scanner.ETokenType.LBRACK; 050import static eu.cqse.check.framework.scanner.ETokenType.LONG; 051import static eu.cqse.check.framework.scanner.ETokenType.LPAREN; 052import static eu.cqse.check.framework.scanner.ETokenType.LT; 053import static eu.cqse.check.framework.scanner.ETokenType.MINUSMINUS; 054import static eu.cqse.check.framework.scanner.ETokenType.NATIVE; 055import static eu.cqse.check.framework.scanner.ETokenType.NEW; 056import static eu.cqse.check.framework.scanner.ETokenType.PACKAGE; 057import static eu.cqse.check.framework.scanner.ETokenType.PLUSPLUS; 058import static eu.cqse.check.framework.scanner.ETokenType.PRIVATE; 059import static eu.cqse.check.framework.scanner.ETokenType.PROTECTED; 060import static eu.cqse.check.framework.scanner.ETokenType.PUBLIC; 061import static eu.cqse.check.framework.scanner.ETokenType.RBRACE; 062import static eu.cqse.check.framework.scanner.ETokenType.RBRACK; 063import static eu.cqse.check.framework.scanner.ETokenType.RETURN; 064import static eu.cqse.check.framework.scanner.ETokenType.RPAREN; 065import static eu.cqse.check.framework.scanner.ETokenType.SEMICOLON; 066import static eu.cqse.check.framework.scanner.ETokenType.SHORT; 067import static eu.cqse.check.framework.scanner.ETokenType.STATIC; 068import static eu.cqse.check.framework.scanner.ETokenType.SUPER; 069import static eu.cqse.check.framework.scanner.ETokenType.SWITCH; 070import static eu.cqse.check.framework.scanner.ETokenType.SYNCHRONIZED; 071import static eu.cqse.check.framework.scanner.ETokenType.THIS; 072import static eu.cqse.check.framework.scanner.ETokenType.THROW; 073import static eu.cqse.check.framework.scanner.ETokenType.TRANSIENT; 074import static eu.cqse.check.framework.scanner.ETokenType.VOID; 075import static eu.cqse.check.framework.scanner.ETokenType.VOLATILE; 076import static eu.cqse.check.framework.scanner.ETokenType.WHILE; 077import static eu.cqse.check.framework.shallowparser.SubTypeNames.ANNOTATION; 078import static eu.cqse.check.framework.shallowparser.SubTypeNames.CONSTRUCTOR; 079import static eu.cqse.check.framework.shallowparser.framework.EShallowEntityType.TYPE; 080import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_ENUM; 081import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_EXPRESSION; 082import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_METHOD; 083import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.IN_TYPE; 084import static eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates.TOP_LEVEL; 085 086import java.util.EnumSet; 087 088import org.conqat.lib.commons.region.Region; 089 090import eu.cqse.check.framework.scanner.ETokenType; 091import eu.cqse.check.framework.shallowparser.SubTypeNames; 092import eu.cqse.check.framework.shallowparser.framework.EShallowEntityType; 093import eu.cqse.check.framework.shallowparser.framework.RecognizerBase; 094import eu.cqse.check.framework.shallowparser.languages.base.CStyleShallowParserBase; 095import eu.cqse.check.framework.shallowparser.languages.base.EGenericParserStates; 096 097/** 098 * Shallow parser for Java. 099 * <p> 100 * What this parser does and does not: 101 * <ul> 102 * <li>The parser recognizes types (classes, enums, interfaces), methods and 103 * attributes, and individual statements.</li> 104 * <li>It recognizes the nesting of statements (e.g. in loops), but does not 105 * parse into the statements. For example, it recognizes an if-statement and 106 * provides the list of sub-statements, but does not provide direct access to 107 * the if-condition.</li> 108 * <li>Import and package statements are parsed as meta information.</li> 109 * <li>Annotations are recognized as meta information, but only annotations at 110 * types and methods. Annotations at parameters are not parsed, as the parser 111 * does not parse into the parameter list of methods.</li> 112 * <li>The parser does not recognize anonymous classes. These are treated as a 113 * single long statement or attribute. Inner classes, however, are parsed 114 * correctly.</li> 115 * <li>The parser can deal with multiple classes in a single file.</li> 116 * </ul> 117 */ 118public class JavaShallowParser extends CStyleShallowParserBase { 119 120 /** {@inheritDoc} */ 121 @Override 122 protected void createMetaRules() { 123 // imports and package 124 inAnyState().sequence(IMPORT, STATIC).skipTo(SEMICOLON) 125 .createNode(EShallowEntityType.META, SubTypeNames.STATIC_IMPORT, new Region(2, -2)).endNode(); 126 inAnyState().sequence(EnumSet.of(IMPORT, PACKAGE)).skipTo(SEMICOLON) 127 .createNode(EShallowEntityType.META, 0, new Region(1, -2)).endNode(); 128 129 // annotations; the spec allows both whitespace and comments between the 130 // '@' and the identifier, but as we filter comments before-hand, this 131 // is not an issue. 132 inAnyState().sequence(AT_OPERATOR, IDENTIFIER).repeated(DOT, IDENTIFIER) 133 .createNode(EShallowEntityType.META, ANNOTATION, new Region(1, -1)).skipNested(LPAREN, RPAREN) 134 .endNode(); 135 136 super.createMetaRules(); 137 } 138 139 @Override 140 protected void createTypeRules() { 141 createEnumTypeRule(); 142 super.createTypeRules(); 143 } 144 145 /** 146 * Enums, like classes, can be declared top-level or nested. At the head of the 147 * enum type are its enum literal declarations. This is implemented by the 148 * change of the parser to the <code>IN_ENUM</code> state. 149 */ 150 private void createEnumTypeRule() { 151 RecognizerBase<EGenericParserStates> typeNode = inState(TOP_LEVEL, IN_TYPE).repeated(getTypeModifier()) 152 .sequence(ENUM, IDENTIFIER).createNode(TYPE, SubTypeNames.ENUM, -1); 153 RecognizerBase<EGenericParserStates> inEnumRule = typeNode.skipTo(LBRACE).parseUntil(IN_ENUM); 154 inEnumRule.sequence(SEMICOLON).parseUntil(IN_TYPE).sequence(RBRACE).endNode(); 155 inEnumRule.sequence(RBRACE).endNode(); 156 } 157 158 /** {@inheritDoc} */ 159 @Override 160 protected EnumSet<ETokenType> getTypeModifier() { 161 return EnumSet.of(PUBLIC, PROTECTED, PRIVATE, ABSTRACT, STATIC, FINAL); 162 } 163 164 /** {@inheritDoc} */ 165 @Override 166 protected EnumSet<ETokenType> getTypeKeywords() { 167 return EnumSet.of(CLASS, INTERFACE, ANNOTATION_INTERFACE); 168 } 169 170 /** {@inheritDoc} */ 171 @Override 172 protected void createClassElementsRules() { 173 createMethodRule(); 174 175 // Enum literals 176 RecognizerBase<EGenericParserStates> enumLiteral = inState(IN_ENUM).markStart().sequence(IDENTIFIER) 177 .skipNested(LPAREN, RPAREN).sequenceBefore(EnumSet.of(SEMICOLON, COMMA, LBRACE, RBRACE)) 178 .createNode(EShallowEntityType.ATTRIBUTE, SubTypeNames.ENUM_LITERAL, 0); 179 enumLiteral.sequence(LBRACE).parseUntil(IN_TYPE).sequence(RBRACE).optional(COMMA).endNode(); 180 enumLiteral.optional(COMMA).endNode(); 181 182 createConstructorRule(); 183 184 // attributes (must be after method, as this would also match methods) 185 typePatternInState(IN_TYPE).sequence(IDENTIFIER).createNode(EShallowEntityType.ATTRIBUTE, "attribute", -1) 186 .skipToWithNesting(SEMICOLON, LBRACE, RBRACE, getSubExpressionRecognizer()).endNode(); 187 188 // static initializer 189 inState(IN_TYPE).sequence(STATIC, LBRACE).createNode(EShallowEntityType.METHOD, "static initializer", "<sinit>") 190 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 191 192 // non-static initializer 193 inState(IN_TYPE).sequence(LBRACE).createNode(EShallowEntityType.METHOD, "non-static initializer", "<init>") 194 .parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 195 } 196 197 /** Recognizes methods. */ 198 private void createMethodRule() { 199 inState(IN_TYPE).sequence(DEFAULT).skipBefore(IDENTIFIER, LPAREN).markStart().sequence(IDENTIFIER, LPAREN) 200 .skipToWithNesting(RPAREN, LPAREN, RPAREN).skipTo(LBRACE) 201 .createNode(EShallowEntityType.METHOD, "default method", 0).parseUntil(IN_METHOD).sequence(RBRACE) 202 .endNode(); 203 204 RecognizerBase<EGenericParserStates> methodAlternative = typePatternInState(IN_TYPE).markStart() 205 .sequence(IDENTIFIER, LPAREN).skipToWithNesting(RPAREN, LPAREN, RPAREN) 206 .skipBefore(EnumSet.of(LBRACE, SEMICOLON, DEFAULT)); 207 methodAlternative.sequence(LBRACE).createNode(EShallowEntityType.METHOD, "method", 0).parseUntil(IN_METHOD) 208 .sequence(RBRACE).endNode(); 209 // annotations 210 methodAlternative.sequence(DEFAULT).createNode(EShallowEntityType.METHOD, SubTypeNames.ABSTRACT, 0) 211 .skipToWithNesting(SEMICOLON, LBRACE, RBRACE).endNode(); 212 methodAlternative.sequence(SEMICOLON).createNode(EShallowEntityType.METHOD, SubTypeNames.ABSTRACT, 0).endNode(); 213 } 214 215 /** 216 * Recognizes constructors. Enum literals are recognized in the 217 * {@link EGenericParserStates#IN_ENUM} state 218 */ 219 private void createConstructorRule() { 220 inState(IN_TYPE).optional(EnumSet.of(PUBLIC, PRIVATE, PROTECTED)).markStart().sequence(IDENTIFIER, LPAREN) 221 .skipToWithNesting(RPAREN, LPAREN, RPAREN).skipTo(LBRACE) 222 .createNode(EShallowEntityType.METHOD, CONSTRUCTOR, 0).parseUntil(IN_METHOD).sequence(RBRACE).endNode(); 223 } 224 225 /** {@inheritDoc} */ 226 @Override 227 protected EnumSet<ETokenType> getSimpleBlockKeywordsWithParentheses() { 228 return EnumSet.of(WHILE, FOR, SWITCH, SYNCHRONIZED); 229 } 230 231 /** {@inheritDoc} */ 232 @Override 233 protected EnumSet<ETokenType> getSimpleBlockKeywordsWithoutParentheses() { 234 return EnumSet.of(ELSE, FINALLY); 235 } 236 237 /** {@inheritDoc} */ 238 @Override 239 protected EnumSet<ETokenType> getStatementStartTokens() { 240 return EnumSet.of(NEW, BREAK, CONTINUE, RETURN, ASSERT, FINAL, GOTO, SUPER, THIS, THROW, LPAREN, PLUSPLUS, 241 MINUSMINUS, IDENTIFIER); 242 } 243 244 /** {@inheritDoc} */ 245 @Override 246 protected void createCaseRule() { 247 super.createCaseRule(); 248 249 // Java also allows fully/partially qualified constants 250 inState(IN_METHOD).markStart().sequence(CASE).repeated(IDENTIFIER, DOT).sequence(IDENTIFIER, COLON) 251 .createNode(EShallowEntityType.META, 0, new Region(1, -2)).endNode(); 252 } 253 254 /** {@inheritDoc} */ 255 @Override 256 protected RecognizerBase<EGenericParserStates> typePattern(RecognizerBase<EGenericParserStates> currentState) { 257 EnumSet<ETokenType> modifierKeywords = EnumSet.of(STATIC, FINAL, PRIVATE, PROTECTED, PUBLIC, ABSTRACT, NATIVE, 258 SYNCHRONIZED, TRANSIENT, VOLATILE, DEFAULT); 259 EnumSet<ETokenType> typeNames = EnumSet.of(IDENTIFIER, VOID, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, CHAR, 260 BOOLEAN); 261 262 RecognizerBase<EGenericParserStates> annotationSubRecognizer = createRecognizer( 263 start -> start.sequence(AT_OPERATOR, IDENTIFIER).repeated(DOT, IDENTIFIER)); 264 265 return currentState.repeated(modifierKeywords).skipNested(LT, GT).repeatedSubRecognizer(annotationSubRecognizer) 266 .repeated(IDENTIFIER, DOT).optional(THIS, DOT).sequence(typeNames).skipNested(LT, GT) 267 .repeatedSubRecognizer(annotationSubRecognizer).skipAny(EnumSet.of(LBRACK, RBRACK)); 268 } 269 270 /** {@inheritDoc} */ 271 @Override 272 protected void createSubExpressionRules() { 273 inState(IN_EXPRESSION).sequence(NEW).markStart().repeated(IDENTIFIER, DOT).sequence(IDENTIFIER) 274 .createNode(EShallowEntityType.TYPE, SubTypeNames.ANONYMOUS_CLASS, new Region(0, -1)).skipNested(LT, GT) 275 .skipNested(LPAREN, RPAREN, getSubExpressionRecognizer()).sequence(LBRACE).parseUntil(IN_TYPE) 276 .sequence(RBRACE).endNode(); 277 278 createLambdaWithArrowRules(ARROW); 279 } 280 281 /** {@inheritDoc} */ 282 @Override 283 protected RecognizerBase<EGenericParserStates> getSubExpressionRecognizer() { 284 return new JavaAnonymousClassAndLambdaRecognizer(); 285 } 286}