001/*-------------------------------------------------------------------------+
002|                                                                          |
0033| Copyright (c) 2009-2018 CQSE GmbH                                        |
004|                                                                          |
005+-------------------------------------------------------------------------*/
006package eu.cqse.check.base;
007
008import java.util.EnumSet;
009import java.util.List;
010import java.util.Optional;
011import java.util.regex.Pattern;
012import java.util.stream.Collectors;
013
014import org.conqat.lib.commons.string.StringUtils;
015
016import eu.cqse.check.framework.core.CheckException;
017import eu.cqse.check.framework.core.CheckImplementationBase;
018import eu.cqse.check.framework.core.ECheckParameter;
019import eu.cqse.check.framework.core.phase.ECodeViewOption;
020import eu.cqse.check.framework.core.util.CheckUtils;
021import eu.cqse.check.framework.scanner.ETokenType;
022import eu.cqse.check.framework.scanner.IToken;
023import eu.cqse.check.framework.shallowparser.framework.ShallowEntity;
024
025/**
026 * Base class for checks on hard-coded character literals. Subclasses check
027 * literals which match {@link #getLiteralRegex()} (if specified) and which
028 * occur in context with an identifier (e.g. assignment, comparison). The
029 * identifier is checked with {@link #isMatchingIdentifier(IToken)}.
030 *
031 * {@link #findAllMatchingLiteral()} can be overwritten if all literals matching
032 * {@link #getLiteralRegex()} should produce a finding, regardless of the
033 * context where these occur.
034 *
035 * The checks works on pre-processed tokens, thus implementing checks must have
036 * the parameter {@link ECheckParameter#ABSTRACT_SYNTAX_TREE} enabled.
037 */
038public abstract class HardCodedLiteralsCheckBase extends CheckImplementationBase {
039
040        /**
041         * ABAP Specific field.
042         * 
043         * The four boolean operator token types.
044         */
045        private static final EnumSet<ETokenType> BOOLEAN_OPERATORS = EnumSet.of(ETokenType.AND, ETokenType.OR,
046                        ETokenType.NOT, ETokenType.EQUIV);
047
048        /**
049         * ABAP Specific field.
050         * 
051         * Token types of the keywords which introduce data definitions where literals
052         * may be specified using the VALUE addition
053         */
054        private static final EnumSet<ETokenType> DATA_VALUE_DEFINITIONS = EnumSet.of(ETokenType.CLASS_DATA,
055                        ETokenType.CONSTANTS, ETokenType.DATA, ETokenType.STATICS, ETokenType.TYPES);
056
057        /** Tokens of the current element, but without comments */
058        protected List<IToken> preprocessedTokens;
059
060        /**
061         * Pattern against literals must match, if <code>null</code> any literal
062         * matches.
063         */
064        private Pattern literalPattern;
065
066        /** {@inheritDoc} */
067        @Override
068        public void execute() throws CheckException {
069                buildLiteralPattern();
070
071                // pre-processed tokens only available from AST
072                for (ShallowEntity entity : context.getAbstractSyntaxTree(ECodeViewOption.FILTERED_PREPROCESSED)) {
073
074                        preprocessedTokens = entity.includedTokens();
075
076                        List<IToken> characterLiterals = preprocessedTokens.stream()
077                                        .filter(token -> CheckUtils.STRING_LITERALS.contains(token.getType())).collect(Collectors.toList());
078
079                        for (IToken literal : characterLiterals) {
080                                checkCharacterLiteral(literal);
081                        }
082                }
083        }
084
085        /**
086         * Builds {@link #literalPattern} from {@link #getLiteralRegex()}, if pattern
087         * can not be build, it is set to <code>null</code>.
088         */
089        private void buildLiteralPattern() {
090                String regex = getLiteralRegex();
091                if (StringUtils.isEmpty(regex)) {
092                        literalPattern = null;
093                        return;
094                }
095                literalPattern = Pattern.compile(regex);
096        }
097
098        /**
099         * Checks the given token which should be a character/string literal.
100         */
101        private void checkCharacterLiteral(IToken literal) throws CheckException {
102                if (!isMatchingLiteral(literal)) {
103                        return;
104                }
105
106                if (findAllMatchingLiteral()) {
107                        createFindingForLiteral(literal);
108                        return;
109                }
110
111                Optional<IToken> identifier = lookupIdentifier(literal);
112
113                if (identifier.isPresent() && isMatchingIdentifier(identifier.get())) {
114                        createFindingForLiteral(literal);
115                }
116        }
117
118        /**
119         * Lookup for the identifier corresponding to the given literal. Delegates to
120         * the language specific lookup and performs generic lookup functions.
121         * 
122         * @return An Optional holding the IToken of the identifier, empty optional if
123         *         no identifier found.
124         */
125        private Optional<IToken> lookupIdentifier(IToken literal) throws CheckException {
126                Optional<IToken> identifier = languageSpecificLookup(literal);
127
128                if (!identifier.isPresent()) {
129                        identifier = lookupIdentifierIfInOperation(literal, true);
130                }
131                if (!identifier.isPresent()) {
132                        identifier = lookupIdentifierIfInOperation(literal, false);
133                }
134
135                return identifier;
136        }
137
138        /**
139         * Differentiates between languages. Depending on the ELanguage of the literal,
140         * the language specific lookup function gets called.
141         * 
142         * @return An Optional holding the IToken of the identifier, empty optional if
143         *         no identifier found.
144         */
145        private Optional<IToken> languageSpecificLookup(IToken literal) throws CheckException {
146                Optional<IToken> identifier;
147                /**
148                 * The fall-through in this switch statement is intended to explicitly state
149                 * which languages are supported and which methods correspond to which language.
150                 */
151                switch (literal.getLanguage()) {
152                case ABAP:
153                        identifier = lookupIdentifierIfInCaseWhen(literal);
154                        if (identifier.isPresent()) {
155                                return identifier;
156                        }
157                case VB:
158                        return lookupIdentifierIfInDefinition(literal);
159                case CS:
160                        identifier = lookupIdentifierIfInProperty(literal);
161                        if (identifier.isPresent()) {
162                                return identifier;
163                        }
164                case JAVA:
165                case CPP:
166                case GOSU:
167                case JAVASCRIPT:
168                        return lookupIdentifierIfInNewStatement(literal);
169                case KOTLIN:
170                case GROOVY:
171                case XTEND:
172                case SWIFT:
173                case PHP:
174                case PYTHON:
175                case RUST:
176                        return Optional.empty();
177                default:
178                        throw new CheckException("Language " + literal.getLanguage() + " of " + literal + " not supported.");
179                }
180        }
181
182        /**
183         * Creates a finding for the given literal
184         */
185        private void createFindingForLiteral(IToken literal) throws CheckException {
186                // Use curly quotation marks for better markdown support.
187                createFinding(getFindingsMessageText() + " " + literal.getText().replaceAll("'", "`"), literal);
188        }
189
190        /**
191         * @return <code>true</code> if the literal matches and should be checked
192         */
193        protected boolean isMatchingLiteral(IToken literal) {
194                if (literalPattern == null) {
195                        return true;
196                }
197                return literalPattern.matcher(CheckUtils.getUnquotedTextForCharacterLiteral(literal)).matches();
198        }
199
200        /**
201         * Indicates if any literal matching {@link #getLiteralRegex()} should be
202         * reported. Default implementation is <code>false</code>.
203         */
204        protected boolean findAllMatchingLiteral() {
205                return false;
206        }
207
208        /**
209         * Performs a lookup for the identifier, if the given literal is stated within
210         * an operation where on the other side of the operator an identifier is stated
211         *
212         * @param leftHandLookup
213         *            determines if identifier should be search on the left side,
214         *            otherwise it is expected on the right side of the operator.
215         * @return An Optional holding the IToken of the identifier, empty optional if
216         *         no identifier found.
217         */
218        protected Optional<IToken> lookupIdentifierIfInOperation(IToken literal, boolean leftHandLookup) {
219                int lookupDirection = 1;
220                if (leftHandLookup) {
221                        lookupDirection = -1;
222                }
223                int literalIndex = preprocessedTokens.indexOf(literal);
224
225                int operatorIndex = literalIndex + lookupDirection;
226                int identifierIndex = operatorIndex + lookupDirection;
227                if (isFieldSymbolAt(identifierIndex)) {
228                        identifierIndex += lookupDirection;
229                }
230
231                if (identifierIndex >= 0 && identifierIndex < preprocessedTokens.size()
232                                && isOperatorOrLParan(preprocessedTokens.get(operatorIndex))
233                                && preprocessedTokens.get(identifierIndex).getType().isIdentifier()) {
234                        return Optional.of(preprocessedTokens.get(identifierIndex));
235                }
236
237                if (leftHandLookup && identifierIndex >= 2) {
238                        identifierIndex -= 2;
239
240                        if (preprocessedTokens.get(identifierIndex).getType().isIdentifier()) {
241                                return Optional.of(preprocessedTokens.get(identifierIndex));
242                        }
243                }
244
245                return Optional.empty();
246        }
247
248        /**
249         * ABAP Specific function
250         * 
251         * Checks if the given token is either an operator (but not a boolean operator)
252         * or a left parenthesis. The check for the left parenthesis is added to match
253         * functional method calls with only one unnamed parameter. Then the method name
254         * is used as identifier.
255         */
256        private static boolean isOperatorOrLParan(IToken token) {
257                ETokenType tokenType = token.getType();
258                return (!BOOLEAN_OPERATORS.contains(tokenType) && tokenType.isOperator()) || tokenType == ETokenType.LPAREN;
259        }
260
261        /**
262         * ABAP Specific function
263         * 
264         * Checks if the literal at the given pos in {@link #preprocessedTokens} is a
265         * field symbol, e.g. enclosed in <..>
266         */
267        private boolean isFieldSymbolAt(int pos) {
268                if (pos < 1 || pos >= preprocessedTokens.size() - 1) {
269                        return false;
270                }
271                IToken token = preprocessedTokens.get(pos);
272                if (token.getType() == ETokenType.LT) {
273                        pos++;
274                } else if (token.getType() == ETokenType.GT) {
275                        pos--;
276                }
277                IToken tokenBefore = preprocessedTokens.get(pos - 1);
278                IToken tokenAfter = preprocessedTokens.get(pos + 1);
279                return tokenBefore.getType() == ETokenType.LT && tokenAfter.getType() == ETokenType.GT;
280        }
281
282        /**
283         * ABAP Specific function
284         * 
285         * Performs a lookup for the identifier if the given literal occurs as an WHEN
286         * option inside a CASE/WHEN structure.
287         *
288         * @return An Optional holding the IToken of the identifier which controls the
289         *         CASE/WHEN structure, empty optional if no identifier found.
290         */
291        protected Optional<IToken> lookupIdentifierIfInCaseWhen(IToken literal) {
292                int literalIndex = preprocessedTokens.indexOf(literal);
293                if (literalIndex < 1 || preprocessedTokens.get(literalIndex - 1).getType() != ETokenType.WHEN) {
294                        return Optional.empty();
295                }
296
297                for (int currentIndex = literalIndex - 1; currentIndex >= 0; currentIndex--) {
298                        if (preprocessedTokens.get(currentIndex).getType() == ETokenType.CASE) {
299                                IToken caseControlIdentifier = preprocessedTokens.get(currentIndex + 1);
300                                if (caseControlIdentifier.getType() == ETokenType.LOWER) {
301                                        // field symbol: skip '<' or '>' token
302                                        caseControlIdentifier = preprocessedTokens.get(currentIndex + 2);
303                                }
304                                if (caseControlIdentifier.getType().isIdentifier()) {
305                                        return Optional.of(caseControlIdentifier);
306                                }
307                                return Optional.empty();
308                        }
309                }
310                return Optional.empty();
311        }
312
313        /**
314         * ABAP Specific function
315         * 
316         * Performs a lookup for identifier when a literal is assigned with the VALUE
317         * addition.
318         *
319         * @return An Optional holding the IToken of the identifier, empty optional if
320         *         no identifier found.
321         */
322        protected Optional<IToken> lookupIdentifierIfInDefinition(IToken literal) {
323                int literalIndex = preprocessedTokens.indexOf(literal);
324                if (literalIndex < 1 || preprocessedTokens.get(literalIndex - 1).getType() != ETokenType.VALUE) {
325                        return Optional.empty();
326                }
327                for (int currentIndex = literalIndex - 1; currentIndex >= 0; currentIndex--) {
328                        ETokenType currentTokenType = preprocessedTokens.get(currentIndex).getType();
329
330                        boolean isStartOfStatement = currentIndex == 0
331                                        || preprocessedTokens.get(currentIndex - 1).getType() == ETokenType.DOT;
332
333                        if (isStartOfStatement && DATA_VALUE_DEFINITIONS.contains(currentTokenType)) {
334                                Optional<IToken> identifier = Optional.of(preprocessedTokens.get(currentIndex + 1));
335                                if (identifier.isPresent()) {
336                                        return identifier;
337                                }
338                        }
339                        if (currentTokenType == ETokenType.DOT) {
340                                return Optional.empty();
341                        }
342                }
343                return Optional.empty();
344        }
345
346        /**
347         * C# Specific function
348         * 
349         * Performs a lookup for the identifier if the given literal occurs as an
350         * PROPERTY.
351         *
352         * @return An Optional holding the IToken of the identifier of the property,
353         *         empty optional if no identifier found.
354         */
355        protected Optional<IToken> lookupIdentifierIfInProperty(IToken literal) {
356                int literalIndex = preprocessedTokens.indexOf(literal);
357                if (literalIndex < 1 || preprocessedTokens.get(literalIndex - 1).getType() != ETokenType.RETURN) {
358                        return Optional.empty();
359                }
360
361                for (int currentIndex = literalIndex - 1; currentIndex >= 0; currentIndex--) {
362                        ETokenType currentTokenType = preprocessedTokens.get(currentIndex).getType();
363
364                        if (currentTokenType.equals(ETokenType.GET)) {
365                                Optional<IToken> identifier = Optional.of(preprocessedTokens.get(currentIndex - 2));
366                                if (identifier.isPresent()) {
367                                        return identifier;
368                                }
369                        }
370                }
371                return Optional.empty();
372        }
373
374        /**
375         * CLike Specific function
376         * 
377         * Performs a lookup for the identifier if the given literal occurs as in a NEW
378         * statement.
379         *
380         * @return An Optional holding the IToken of the identifier of the new
381         *         statement, empty optional if no identifier found.
382         */
383        protected Optional<IToken> lookupIdentifierIfInNewStatement(IToken literal) {
384                int literalIndex = preprocessedTokens.indexOf(literal);
385                if (literalIndex < 4 || preprocessedTokens.get(literalIndex - 3).getType() != ETokenType.NEW) {
386                        return Optional.empty();
387                }
388
389                IToken identifier = preprocessedTokens.get(literalIndex - 5);
390
391                if (identifier.getType() == ETokenType.IDENTIFIER) {
392                        return Optional.of(identifier);
393                }
394                return Optional.empty();
395        }
396
397        /**
398         * Gets the regular expression against which literals must match. If
399         * <code>null</code> any literal matches.
400         */
401        protected abstract String getLiteralRegex();
402
403        /**
404         * Checks if the given identifier matches
405         */
406        protected abstract boolean isMatchingIdentifier(IToken identifier);
407
408        /**
409         * @return text for findings message
410         */
411        protected abstract String getFindingsMessageText();
412}