001/*-------------------------------------------------------------------------+
002|                                                                          |
003| Copyright 2005-2011 the ConQAT Project                                   |
004|                                                                          |
005| Licensed under the Apache License, Version 2.0 (the "License");          |
006| you may not use this file except in compliance with the License.         |
007| You may obtain a copy of the License at                                  |
008|                                                                          |
009|    http://www.apache.org/licenses/LICENSE-2.0                            |
010|                                                                          |
011| Unless required by applicable law or agreed to in writing, software      |
012| distributed under the License is distributed on an "AS IS" BASIS,        |
013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014| See the License for the specific language governing permissions and      |
015| limitations under the License.                                           |
016+-------------------------------------------------------------------------*/
017package eu.cqse.check.framework.preprocessor.c;
018
019import java.util.ArrayList;
020import java.util.List;
021import java.util.regex.Pattern;
022
023import javax.script.ScriptEngine;
024import javax.script.ScriptEngineManager;
025import javax.script.ScriptException;
026
027import org.conqat.lib.commons.cache4j.ICache;
028import org.conqat.lib.commons.cache4j.SynchronizedCache;
029import org.conqat.lib.commons.cache4j.backend.ECachingStrategy;
030import org.conqat.lib.commons.collections.CollectionUtils;
031import org.conqat.lib.commons.error.NeverThrownRuntimeException;
032import org.conqat.lib.commons.region.Region;
033import org.conqat.lib.commons.region.RegionSet;
034import org.conqat.lib.commons.string.StringUtils;
035
036import eu.cqse.check.framework.scanner.ETokenType;
037import eu.cqse.check.framework.scanner.ETokenType.ETokenClass;
038import eu.cqse.check.framework.scanner.IToken;
039import eu.cqse.check.framework.shallowparser.TokenStreamTextUtils;
040import eu.cqse.check.framework.shallowparser.TokenStreamUtils;
041
042/**
043 * Base class of the C preprocessor that deals with conditionals in addition to
044 * macro handling and the overall parsing.
045 * 
046 * The design of the preprocessor uses inheritance to separate the aspects macro
047 * handling, include handling, and conditionals to separate classes. This is
048 * meant to keep the classes small and easier to understand, while still
049 * providing a final common class, that can be easily extended by subclassing
050 * and overriding certain methods, which would be hard when using
051 * delegation/composition.
052 */
053public abstract class ConditionalHandlingCPreprocessorBase extends MacroHandlingCPreprocessorBase {
054
055        /** The script engine used for evaluating conditionals. */
056        private static final ScriptEngine CONDITIONAL_EVALUATION_ENGINE = new ScriptEngineManager()
057                        .getEngineByName("JavaScript");
058
059        /**
060         * Caching for expressions to reduce the number of expensive evaluations.
061         */
062        private static final ICache<String, Boolean, NeverThrownRuntimeException> EXPRESSION_CACHE = new SynchronizedCache<>(
063                        "EXPRESSION_CACHE", (String expression) -> evaluateExpression(expression),
064                        ECachingStrategy.LRU.<String, Boolean>getBackend(5000));
065
066        /** Patterns used for finding endif directives. */
067        private static final Pattern ENDIF_DIRECTIVE_START_PATTERN = Pattern.compile("^#\\s*endif");
068
069        /** Patterns used for finding elif directives. */
070        private static final Pattern ELIF_DIRECTIVE_START_PATTERN = Pattern.compile("^#\\s*elif");
071
072        /** Patterns used for finding else directives. */
073        private static final Pattern ELSE_DIRECTIVE_START_PATTERN = Pattern.compile("^#\\s*else");
074
075        /** Constructor. */
076        protected ConditionalHandlingCPreprocessorBase(IMacroProvider macroProvider) {
077                super(macroProvider);
078        }
079
080        /** Returns whether the given token is an endif directive. */
081        private static boolean isEndIfDirective(IToken token) {
082                return ENDIF_DIRECTIVE_START_PATTERN.matcher(token.getText()).find();
083        }
084
085        /** Returns whether the given token is an elif directive. */
086        private static boolean isElifDirective(IToken token) {
087                return ELIF_DIRECTIVE_START_PATTERN.matcher(token.getText()).find();
088        }
089
090        /** Returns whether the given token is an else directive. */
091        private static boolean isElseDirective(IToken token) {
092                return ELSE_DIRECTIVE_START_PATTERN.matcher(token.getText()).find();
093        }
094
095        /** {@inheritDoc} */
096        @Override
097        protected void processIfDirective(List<IToken> tokens, int ifIndex, RegionSet ignoredRegions) {
098                List<IfRegionDescriptor> ifRegions = new ArrayList<>();
099                ifRegions.add(new IfRegionDescriptor(extractCondition(tokens.get(ifIndex)), ifIndex));
100                int nestingCount = 0;
101                for (int i = ifIndex + 1; i < tokens.size(); ++i) {
102                        IToken currentToken = tokens.get(i);
103                        if (isIfDirective(currentToken)) {
104                                nestingCount += 1;
105                        }
106
107                        if (nestingCount > 0) {
108                                if (isEndIfDirective(currentToken)) {
109                                        nestingCount -= 1;
110                                }
111                                continue;
112                        }
113
114                        if (isEndIfDirective(currentToken)) {
115                                CollectionUtils.getLast(ifRegions).closeRegion(i);
116                                decideRegionInclusion(ifRegions);
117                                applyIfRegions(ifRegions, ignoredRegions);
118                                return;
119                        } else if (isElseDirective(currentToken)) {
120                                CollectionUtils.getLast(ifRegions).closeRegion(i);
121                                ifRegions.add(new IfRegionDescriptor("1", i));
122                        } else if (isElifDirective(currentToken)) {
123                                CollectionUtils.getLast(ifRegions).closeRegion(i);
124                                ifRegions.add(new IfRegionDescriptor(extractCondition(tokens.get(i)), i));
125                        }
126                }
127
128                // if we reached this, there is a dangling region at the end
129                CollectionUtils.getLast(ifRegions).closeRegion(tokens.size() - 1);
130        }
131
132        /**
133         * Extracts and returns the condition for an if/ifdef/ifndef/elif directive.
134         */
135        private static String extractCondition(IToken token) {
136                // strip leading '#'
137                String content = token.getText().trim().substring(1);
138                List<IToken> subTokens = parseMacroContent(content);
139
140                if (subTokens.size() == 2 && subTokens.get(0).getText().equals("ifdef")) {
141                        return "defined(" + subTokens.get(1).getText() + ")";
142                }
143
144                if (subTokens.size() == 2 && subTokens.get(0).getText().equals("ifndef")) {
145                        return "!defined(" + subTokens.get(1).getText() + ")";
146                }
147
148                return TokenStreamTextUtils.concatTokenTexts(subTokens.subList(1, subTokens.size()), StringUtils.SPACE);
149        }
150
151        /**
152         * Decides for each ifRegion, whether it should be included or not. The default
153         * implementation does this by checking each region's condition and selecting
154         * the first region with a true condition. Sub classes may change the behavior,
155         * by including one or multiple regions based on other criteria.
156         * 
157         * @param ifRegions
158         *            the regions to decide for. The inclusion of the regions is
159         *            initially false and must be updated, if a region should be
160         *            included.
161         */
162        protected void decideRegionInclusion(List<IfRegionDescriptor> ifRegions) {
163                for (IfRegionDescriptor ifRegion : ifRegions) {
164                        if (conditionIsTrue(ifRegion.getCondition())) {
165                                ifRegion.setInclude(true);
166                                return;
167                        }
168                }
169        }
170
171        /** Evaluates an if condition and returns its boolean value. */
172        protected boolean conditionIsTrue(String condition) {
173                List<IToken> conditionTokens = parseMacroContent(condition);
174                conditionTokens = expandDefined(conditionTokens);
175
176                // expand all macros
177                conditionTokens = preprocess(null, conditionTokens);
178
179                // if there are still identifiers left, we are missing values for them
180                if (conditionTokens.isEmpty() || TokenStreamUtils.containsAny(conditionTokens, ETokenType.IDENTIFIER)) {
181                        return false;
182                }
183
184                if (conditionTokens.size() == 1 && conditionTokens.get(0).getType() == ETokenType.INTEGER_LITERAL) {
185                        return Integer.parseInt(conditionTokens.get(0).getText().replaceAll("[^0-9]", "")) != 0;
186                }
187
188                String expression = TokenStreamTextUtils.concatTokenTexts(CollectionUtils.filter(conditionTokens,
189                                token -> token.getType().getTokenClass() != ETokenClass.COMMENT), StringUtils.SPACE);
190                expression = stripNumberSuffixes(expression);
191                return EXPRESSION_CACHE.obtain(expression);
192        }
193
194        /**
195         * Removes number suffixes for size and signedness (u, s, l, etc.) from decimal
196         * and hexadecimal numbers.
197         */
198        private static String stripNumberSuffixes(String expression) {
199                return expression.replaceAll("(\\d|[a-fA-F])[uUsSlL]{1,2}", "$1");
200        }
201
202        /** Expands "defined" statements. */
203        private List<IToken> expandDefined(List<IToken> tokens) {
204                List<IToken> result = new ArrayList<>();
205                for (int i = 0; i < tokens.size(); ++i) {
206                        IToken token = tokens.get(i);
207                        if (token.getType() == ETokenType.IDENTIFIER && token.getText().equals("defined")) {
208                                if (i + 3 >= tokens.size() || !TokenStreamUtils.hasTokenTypeSequence(tokens, i + 1, ETokenType.LPAREN,
209                                                ETokenType.IDENTIFIER, ETokenType.RPAREN)) {
210                                        // broken define; break here
211                                        return result;
212                                }
213
214                                String macroName = tokens.get(i + 2).getText();
215                                String value = "0";
216                                if (macroProvider.isDefined(macroName)) {
217                                        value = "1";
218                                }
219                                result.add(token.newToken(ETokenType.INTEGER_LITERAL, token.getOffset(), token.getLineNumber(), value,
220                                                token.getOriginId()));
221
222                                // consume additional tokens
223                                i += 3;
224                        } else {
225                                result.add(token);
226                        }
227                }
228                return result;
229        }
230
231        /**
232         * Applies the inclusion state of the given if regions by updating the ignored
233         * regions.
234         */
235        private static void applyIfRegions(List<IfRegionDescriptor> ifRegions, RegionSet ignoredRegions) {
236                for (IfRegionDescriptor ifRegion : ifRegions) {
237                        if (ifRegion.isInclude()) {
238                                ignoredRegions.add(new Region(ifRegion.getStartIndex(), ifRegion.getStartIndex()));
239                                ignoredRegions.add(new Region(ifRegion.getEndIndex(), ifRegion.getEndIndex()));
240                        } else {
241                                ignoredRegions.add(new Region(ifRegion.getStartIndex(), ifRegion.getEndIndex()));
242                        }
243                }
244        }
245
246        /** Evaluates a boolean expression. */
247        private static Boolean evaluateExpression(String expression) {
248                try {
249                        Object result;
250
251                        // the remainder at this point is a boolean or
252                        // arithmetic expression, which is compatible with
253                        // JavaScript, so we use this as parser
254                        synchronized (CONDITIONAL_EVALUATION_ENGINE) {
255                                result = CONDITIONAL_EVALUATION_ENGINE.eval(expression);
256                        }
257
258                        if (result instanceof Boolean) {
259                                return (Boolean) result;
260                        }
261                        if (result instanceof Integer) {
262                                return 0 != (Integer) result;
263                        }
264                        return false;
265                } catch (ScriptException e) {
266                        // not parseable
267                        return false;
268                }
269        }
270}