001/*-------------------------------------------------------------------------+ 002| | 0033| Copyright (c) 2009-2018 CQSE GmbH | 004| | 005+-------------------------------------------------------------------------*/ 006package eu.cqse.check.base; 007 008import java.util.EnumSet; 009import java.util.List; 010import java.util.Optional; 011import java.util.regex.Pattern; 012import java.util.stream.Collectors; 013 014import org.conqat.lib.commons.string.StringUtils; 015 016import eu.cqse.check.framework.core.CheckException; 017import eu.cqse.check.framework.core.CheckImplementationBase; 018import eu.cqse.check.framework.core.ECheckParameter; 019import eu.cqse.check.framework.core.phase.ECodeViewOption; 020import eu.cqse.check.framework.core.util.CheckUtils; 021import eu.cqse.check.framework.scanner.ETokenType; 022import eu.cqse.check.framework.scanner.IToken; 023import eu.cqse.check.framework.shallowparser.framework.ShallowEntity; 024 025/** 026 * Base class for checks on hard-coded character literals. Subclasses check 027 * literals which match {@link #getLiteralRegex()} (if specified) and which 028 * occur in context with an identifier (e.g. assignment, comparison). The 029 * identifier is checked with {@link #isMatchingIdentifier(IToken)}. 030 * 031 * {@link #findAllMatchingLiteral()} can be overwritten if all literals matching 032 * {@link #getLiteralRegex()} should produce a finding, regardless of the 033 * context where these occur. 034 * 035 * The checks works on pre-processed tokens, thus implementing checks must have 036 * the parameter {@link ECheckParameter#ABSTRACT_SYNTAX_TREE} enabled. 037 */ 038public abstract class HardCodedLiteralsCheckBase extends CheckImplementationBase { 039 040 /** 041 * ABAP Specific field. 042 * 043 * The four boolean operator token types. 044 */ 045 private static final EnumSet<ETokenType> BOOLEAN_OPERATORS = EnumSet.of(ETokenType.AND, ETokenType.OR, 046 ETokenType.NOT, ETokenType.EQUIV); 047 048 /** 049 * ABAP Specific field. 050 * 051 * Token types of the keywords which introduce data definitions where literals 052 * may be specified using the VALUE addition 053 */ 054 private static final EnumSet<ETokenType> DATA_VALUE_DEFINITIONS = EnumSet.of(ETokenType.CLASS_DATA, 055 ETokenType.CONSTANTS, ETokenType.DATA, ETokenType.STATICS, ETokenType.TYPES); 056 057 /** Tokens of the current element, but without comments */ 058 protected List<IToken> preprocessedTokens; 059 060 /** 061 * Pattern against literals must match, if <code>null</code> any literal 062 * matches. 063 */ 064 private Pattern literalPattern; 065 066 /** {@inheritDoc} */ 067 @Override 068 public void execute() throws CheckException { 069 buildLiteralPattern(); 070 071 // pre-processed tokens only available from AST 072 for (ShallowEntity entity : context.getAbstractSyntaxTree(ECodeViewOption.FILTERED_PREPROCESSED)) { 073 074 preprocessedTokens = entity.includedTokens(); 075 076 List<IToken> characterLiterals = preprocessedTokens.stream() 077 .filter(token -> CheckUtils.STRING_LITERALS.contains(token.getType())).collect(Collectors.toList()); 078 079 for (IToken literal : characterLiterals) { 080 checkCharacterLiteral(literal); 081 } 082 } 083 } 084 085 /** 086 * Builds {@link #literalPattern} from {@link #getLiteralRegex()}, if pattern 087 * can not be build, it is set to <code>null</code>. 088 */ 089 private void buildLiteralPattern() { 090 String regex = getLiteralRegex(); 091 if (StringUtils.isEmpty(regex)) { 092 literalPattern = null; 093 return; 094 } 095 literalPattern = Pattern.compile(regex); 096 } 097 098 /** 099 * Checks the given token which should be a character/string literal. 100 */ 101 private void checkCharacterLiteral(IToken literal) throws CheckException { 102 if (!isMatchingLiteral(literal)) { 103 return; 104 } 105 106 if (findAllMatchingLiteral()) { 107 createFindingForLiteral(literal); 108 return; 109 } 110 111 Optional<IToken> identifier = lookupIdentifier(literal); 112 113 if (identifier.isPresent() && isMatchingIdentifier(identifier.get())) { 114 createFindingForLiteral(literal); 115 } 116 } 117 118 /** 119 * Lookup for the identifier corresponding to the given literal. Delegates to 120 * the language specific lookup and performs generic lookup functions. 121 * 122 * @return An Optional holding the IToken of the identifier, empty optional if 123 * no identifier found. 124 */ 125 private Optional<IToken> lookupIdentifier(IToken literal) throws CheckException { 126 Optional<IToken> identifier = languageSpecificLookup(literal); 127 128 if (!identifier.isPresent()) { 129 identifier = lookupIdentifierIfInOperation(literal, true); 130 } 131 if (!identifier.isPresent()) { 132 identifier = lookupIdentifierIfInOperation(literal, false); 133 } 134 135 return identifier; 136 } 137 138 /** 139 * Differentiates between languages. Depending on the ELanguage of the literal, 140 * the language specific lookup function gets called. 141 * 142 * @return An Optional holding the IToken of the identifier, empty optional if 143 * no identifier found. 144 */ 145 private Optional<IToken> languageSpecificLookup(IToken literal) throws CheckException { 146 Optional<IToken> identifier; 147 /** 148 * The fall-through in this switch statement is intended to explicitly state 149 * which languages are supported and which methods correspond to which language. 150 */ 151 switch (literal.getLanguage()) { 152 case ABAP: 153 identifier = lookupIdentifierIfInCaseWhen(literal); 154 if (identifier.isPresent()) { 155 return identifier; 156 } 157 case VB: 158 return lookupIdentifierIfInDefinition(literal); 159 case CS: 160 identifier = lookupIdentifierIfInProperty(literal); 161 if (identifier.isPresent()) { 162 return identifier; 163 } 164 case JAVA: 165 case CPP: 166 case GOSU: 167 case JAVASCRIPT: 168 return lookupIdentifierIfInNewStatement(literal); 169 case KOTLIN: 170 case GROOVY: 171 case XTEND: 172 case SWIFT: 173 case PHP: 174 case PYTHON: 175 case RUST: 176 return Optional.empty(); 177 default: 178 throw new CheckException("Language " + literal.getLanguage() + " of " + literal + " not supported."); 179 } 180 } 181 182 /** 183 * Creates a finding for the given literal 184 */ 185 private void createFindingForLiteral(IToken literal) throws CheckException { 186 // Use curly quotation marks for better markdown support. 187 createFinding(getFindingsMessageText() + " " + literal.getText().replaceAll("'", "`"), literal); 188 } 189 190 /** 191 * @return <code>true</code> if the literal matches and should be checked 192 */ 193 protected boolean isMatchingLiteral(IToken literal) { 194 if (literalPattern == null) { 195 return true; 196 } 197 return literalPattern.matcher(CheckUtils.getUnquotedTextForCharacterLiteral(literal)).matches(); 198 } 199 200 /** 201 * Indicates if any literal matching {@link #getLiteralRegex()} should be 202 * reported. Default implementation is <code>false</code>. 203 */ 204 protected boolean findAllMatchingLiteral() { 205 return false; 206 } 207 208 /** 209 * Performs a lookup for the identifier, if the given literal is stated within 210 * an operation where on the other side of the operator an identifier is stated 211 * 212 * @param leftHandLookup 213 * determines if identifier should be search on the left side, 214 * otherwise it is expected on the right side of the operator. 215 * @return An Optional holding the IToken of the identifier, empty optional if 216 * no identifier found. 217 */ 218 protected Optional<IToken> lookupIdentifierIfInOperation(IToken literal, boolean leftHandLookup) { 219 int lookupDirection = 1; 220 if (leftHandLookup) { 221 lookupDirection = -1; 222 } 223 int literalIndex = preprocessedTokens.indexOf(literal); 224 225 int operatorIndex = literalIndex + lookupDirection; 226 int identifierIndex = operatorIndex + lookupDirection; 227 if (isFieldSymbolAt(identifierIndex)) { 228 identifierIndex += lookupDirection; 229 } 230 231 if (identifierIndex >= 0 && identifierIndex < preprocessedTokens.size() 232 && isOperatorOrLParan(preprocessedTokens.get(operatorIndex)) 233 && preprocessedTokens.get(identifierIndex).getType().isIdentifier()) { 234 return Optional.of(preprocessedTokens.get(identifierIndex)); 235 } 236 237 if (leftHandLookup && identifierIndex >= 2) { 238 identifierIndex -= 2; 239 240 if (preprocessedTokens.get(identifierIndex).getType().isIdentifier()) { 241 return Optional.of(preprocessedTokens.get(identifierIndex)); 242 } 243 } 244 245 return Optional.empty(); 246 } 247 248 /** 249 * ABAP Specific function 250 * 251 * Checks if the given token is either an operator (but not a boolean operator) 252 * or a left parenthesis. The check for the left parenthesis is added to match 253 * functional method calls with only one unnamed parameter. Then the method name 254 * is used as identifier. 255 */ 256 private static boolean isOperatorOrLParan(IToken token) { 257 ETokenType tokenType = token.getType(); 258 return (!BOOLEAN_OPERATORS.contains(tokenType) && tokenType.isOperator()) || tokenType == ETokenType.LPAREN; 259 } 260 261 /** 262 * ABAP Specific function 263 * 264 * Checks if the literal at the given pos in {@link #preprocessedTokens} is a 265 * field symbol, e.g. enclosed in <..> 266 */ 267 private boolean isFieldSymbolAt(int pos) { 268 if (pos < 1 || pos >= preprocessedTokens.size() - 1) { 269 return false; 270 } 271 IToken token = preprocessedTokens.get(pos); 272 if (token.getType() == ETokenType.LT) { 273 pos++; 274 } else if (token.getType() == ETokenType.GT) { 275 pos--; 276 } 277 IToken tokenBefore = preprocessedTokens.get(pos - 1); 278 IToken tokenAfter = preprocessedTokens.get(pos + 1); 279 return tokenBefore.getType() == ETokenType.LT && tokenAfter.getType() == ETokenType.GT; 280 } 281 282 /** 283 * ABAP Specific function 284 * 285 * Performs a lookup for the identifier if the given literal occurs as an WHEN 286 * option inside a CASE/WHEN structure. 287 * 288 * @return An Optional holding the IToken of the identifier which controls the 289 * CASE/WHEN structure, empty optional if no identifier found. 290 */ 291 protected Optional<IToken> lookupIdentifierIfInCaseWhen(IToken literal) { 292 int literalIndex = preprocessedTokens.indexOf(literal); 293 if (literalIndex < 1 || preprocessedTokens.get(literalIndex - 1).getType() != ETokenType.WHEN) { 294 return Optional.empty(); 295 } 296 297 for (int currentIndex = literalIndex - 1; currentIndex >= 0; currentIndex--) { 298 if (preprocessedTokens.get(currentIndex).getType() == ETokenType.CASE) { 299 IToken caseControlIdentifier = preprocessedTokens.get(currentIndex + 1); 300 if (caseControlIdentifier.getType() == ETokenType.LOWER) { 301 // field symbol: skip '<' or '>' token 302 caseControlIdentifier = preprocessedTokens.get(currentIndex + 2); 303 } 304 if (caseControlIdentifier.getType().isIdentifier()) { 305 return Optional.of(caseControlIdentifier); 306 } 307 return Optional.empty(); 308 } 309 } 310 return Optional.empty(); 311 } 312 313 /** 314 * ABAP Specific function 315 * 316 * Performs a lookup for identifier when a literal is assigned with the VALUE 317 * addition. 318 * 319 * @return An Optional holding the IToken of the identifier, empty optional if 320 * no identifier found. 321 */ 322 protected Optional<IToken> lookupIdentifierIfInDefinition(IToken literal) { 323 int literalIndex = preprocessedTokens.indexOf(literal); 324 if (literalIndex < 1 || preprocessedTokens.get(literalIndex - 1).getType() != ETokenType.VALUE) { 325 return Optional.empty(); 326 } 327 for (int currentIndex = literalIndex - 1; currentIndex >= 0; currentIndex--) { 328 ETokenType currentTokenType = preprocessedTokens.get(currentIndex).getType(); 329 330 boolean isStartOfStatement = currentIndex == 0 331 || preprocessedTokens.get(currentIndex - 1).getType() == ETokenType.DOT; 332 333 if (isStartOfStatement && DATA_VALUE_DEFINITIONS.contains(currentTokenType)) { 334 Optional<IToken> identifier = Optional.of(preprocessedTokens.get(currentIndex + 1)); 335 if (identifier.isPresent()) { 336 return identifier; 337 } 338 } 339 if (currentTokenType == ETokenType.DOT) { 340 return Optional.empty(); 341 } 342 } 343 return Optional.empty(); 344 } 345 346 /** 347 * C# Specific function 348 * 349 * Performs a lookup for the identifier if the given literal occurs as an 350 * PROPERTY. 351 * 352 * @return An Optional holding the IToken of the identifier of the property, 353 * empty optional if no identifier found. 354 */ 355 protected Optional<IToken> lookupIdentifierIfInProperty(IToken literal) { 356 int literalIndex = preprocessedTokens.indexOf(literal); 357 if (literalIndex < 1 || preprocessedTokens.get(literalIndex - 1).getType() != ETokenType.RETURN) { 358 return Optional.empty(); 359 } 360 361 for (int currentIndex = literalIndex - 1; currentIndex >= 0; currentIndex--) { 362 ETokenType currentTokenType = preprocessedTokens.get(currentIndex).getType(); 363 364 if (currentTokenType.equals(ETokenType.GET)) { 365 Optional<IToken> identifier = Optional.of(preprocessedTokens.get(currentIndex - 2)); 366 if (identifier.isPresent()) { 367 return identifier; 368 } 369 } 370 } 371 return Optional.empty(); 372 } 373 374 /** 375 * CLike Specific function 376 * 377 * Performs a lookup for the identifier if the given literal occurs as in a NEW 378 * statement. 379 * 380 * @return An Optional holding the IToken of the identifier of the new 381 * statement, empty optional if no identifier found. 382 */ 383 protected Optional<IToken> lookupIdentifierIfInNewStatement(IToken literal) { 384 int literalIndex = preprocessedTokens.indexOf(literal); 385 if (literalIndex < 4 || preprocessedTokens.get(literalIndex - 3).getType() != ETokenType.NEW) { 386 return Optional.empty(); 387 } 388 389 IToken identifier = preprocessedTokens.get(literalIndex - 5); 390 391 if (identifier.getType() == ETokenType.IDENTIFIER) { 392 return Optional.of(identifier); 393 } 394 return Optional.empty(); 395 } 396 397 /** 398 * Gets the regular expression against which literals must match. If 399 * <code>null</code> any literal matches. 400 */ 401 protected abstract String getLiteralRegex(); 402 403 /** 404 * Checks if the given identifier matches 405 */ 406 protected abstract boolean isMatchingIdentifier(IToken identifier); 407 408 /** 409 * @return text for findings message 410 */ 411 protected abstract String getFindingsMessageText(); 412}