001/*-------------------------------------------------------------------------+ 002| | 003| Copyright 2005-2011 the ConQAT Project | 004| | 005| Licensed under the Apache License, Version 2.0 (the "License"); | 006| you may not use this file except in compliance with the License. | 007| You may obtain a copy of the License at | 008| | 009| http://www.apache.org/licenses/LICENSE-2.0 | 010| | 011| Unless required by applicable law or agreed to in writing, software | 012| distributed under the License is distributed on an "AS IS" BASIS, | 013| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014| See the License for the specific language governing permissions and | 015| limitations under the License. | 016+-------------------------------------------------------------------------*/ 017package eu.cqse.check.framework.preprocessor.c; 018 019import java.util.ArrayList; 020import java.util.List; 021import java.util.regex.Pattern; 022 023import javax.script.ScriptEngine; 024import javax.script.ScriptEngineManager; 025import javax.script.ScriptException; 026 027import org.conqat.lib.commons.cache4j.ICache; 028import org.conqat.lib.commons.cache4j.SynchronizedCache; 029import org.conqat.lib.commons.cache4j.backend.ECachingStrategy; 030import org.conqat.lib.commons.collections.CollectionUtils; 031import org.conqat.lib.commons.error.NeverThrownRuntimeException; 032import org.conqat.lib.commons.region.Region; 033import org.conqat.lib.commons.region.RegionSet; 034import org.conqat.lib.commons.string.StringUtils; 035 036import eu.cqse.check.framework.scanner.ETokenType; 037import eu.cqse.check.framework.scanner.ETokenType.ETokenClass; 038import eu.cqse.check.framework.scanner.IToken; 039import eu.cqse.check.framework.shallowparser.TokenStreamTextUtils; 040import eu.cqse.check.framework.shallowparser.TokenStreamUtils; 041 042/** 043 * Base class of the C preprocessor that deals with conditionals in addition to 044 * macro handling and the overall parsing. 045 * 046 * The design of the preprocessor uses inheritance to separate the aspects macro 047 * handling, include handling, and conditionals to separate classes. This is 048 * meant to keep the classes small and easier to understand, while still 049 * providing a final common class, that can be easily extended by subclassing 050 * and overriding certain methods, which would be hard when using 051 * delegation/composition. 052 */ 053public abstract class ConditionalHandlingCPreprocessorBase extends MacroHandlingCPreprocessorBase { 054 055 /** The script engine used for evaluating conditionals. */ 056 private static final ScriptEngine CONDITIONAL_EVALUATION_ENGINE = new ScriptEngineManager() 057 .getEngineByName("JavaScript"); 058 059 /** 060 * Caching for expressions to reduce the number of expensive evaluations. 061 */ 062 private static final ICache<String, Boolean, NeverThrownRuntimeException> EXPRESSION_CACHE = new SynchronizedCache<>( 063 "EXPRESSION_CACHE", (String expression) -> evaluateExpression(expression), 064 ECachingStrategy.LRU.<String, Boolean>getBackend(5000)); 065 066 /** Patterns used for finding endif directives. */ 067 private static final Pattern ENDIF_DIRECTIVE_START_PATTERN = Pattern.compile("^#\\s*endif"); 068 069 /** Patterns used for finding elif directives. */ 070 private static final Pattern ELIF_DIRECTIVE_START_PATTERN = Pattern.compile("^#\\s*elif"); 071 072 /** Patterns used for finding else directives. */ 073 private static final Pattern ELSE_DIRECTIVE_START_PATTERN = Pattern.compile("^#\\s*else"); 074 075 /** Constructor. */ 076 protected ConditionalHandlingCPreprocessorBase(IMacroProvider macroProvider) { 077 super(macroProvider); 078 } 079 080 /** Returns whether the given token is an endif directive. */ 081 private static boolean isEndIfDirective(IToken token) { 082 return ENDIF_DIRECTIVE_START_PATTERN.matcher(token.getText()).find(); 083 } 084 085 /** Returns whether the given token is an elif directive. */ 086 private static boolean isElifDirective(IToken token) { 087 return ELIF_DIRECTIVE_START_PATTERN.matcher(token.getText()).find(); 088 } 089 090 /** Returns whether the given token is an else directive. */ 091 private static boolean isElseDirective(IToken token) { 092 return ELSE_DIRECTIVE_START_PATTERN.matcher(token.getText()).find(); 093 } 094 095 /** {@inheritDoc} */ 096 @Override 097 protected void processIfDirective(List<IToken> tokens, int ifIndex, RegionSet ignoredRegions) { 098 List<IfRegionDescriptor> ifRegions = new ArrayList<>(); 099 ifRegions.add(new IfRegionDescriptor(extractCondition(tokens.get(ifIndex)), ifIndex)); 100 int nestingCount = 0; 101 for (int i = ifIndex + 1; i < tokens.size(); ++i) { 102 IToken currentToken = tokens.get(i); 103 if (isIfDirective(currentToken)) { 104 nestingCount += 1; 105 } 106 107 if (nestingCount > 0) { 108 if (isEndIfDirective(currentToken)) { 109 nestingCount -= 1; 110 } 111 continue; 112 } 113 114 if (isEndIfDirective(currentToken)) { 115 CollectionUtils.getLast(ifRegions).closeRegion(i); 116 decideRegionInclusion(ifRegions); 117 applyIfRegions(ifRegions, ignoredRegions); 118 return; 119 } else if (isElseDirective(currentToken)) { 120 CollectionUtils.getLast(ifRegions).closeRegion(i); 121 ifRegions.add(new IfRegionDescriptor("1", i)); 122 } else if (isElifDirective(currentToken)) { 123 CollectionUtils.getLast(ifRegions).closeRegion(i); 124 ifRegions.add(new IfRegionDescriptor(extractCondition(tokens.get(i)), i)); 125 } 126 } 127 128 // if we reached this, there is a dangling region at the end 129 CollectionUtils.getLast(ifRegions).closeRegion(tokens.size() - 1); 130 } 131 132 /** 133 * Extracts and returns the condition for an if/ifdef/ifndef/elif directive. 134 */ 135 private static String extractCondition(IToken token) { 136 // strip leading '#' 137 String content = token.getText().trim().substring(1); 138 List<IToken> subTokens = parseMacroContent(content); 139 140 if (subTokens.size() == 2 && subTokens.get(0).getText().equals("ifdef")) { 141 return "defined(" + subTokens.get(1).getText() + ")"; 142 } 143 144 if (subTokens.size() == 2 && subTokens.get(0).getText().equals("ifndef")) { 145 return "!defined(" + subTokens.get(1).getText() + ")"; 146 } 147 148 return TokenStreamTextUtils.concatTokenTexts(subTokens.subList(1, subTokens.size()), StringUtils.SPACE); 149 } 150 151 /** 152 * Decides for each ifRegion, whether it should be included or not. The default 153 * implementation does this by checking each region's condition and selecting 154 * the first region with a true condition. Sub classes may change the behavior, 155 * by including one or multiple regions based on other criteria. 156 * 157 * @param ifRegions 158 * the regions to decide for. The inclusion of the regions is 159 * initially false and must be updated, if a region should be 160 * included. 161 */ 162 protected void decideRegionInclusion(List<IfRegionDescriptor> ifRegions) { 163 for (IfRegionDescriptor ifRegion : ifRegions) { 164 if (conditionIsTrue(ifRegion.getCondition())) { 165 ifRegion.setInclude(true); 166 return; 167 } 168 } 169 } 170 171 /** Evaluates an if condition and returns its boolean value. */ 172 protected boolean conditionIsTrue(String condition) { 173 List<IToken> conditionTokens = parseMacroContent(condition); 174 conditionTokens = expandDefined(conditionTokens); 175 176 // expand all macros 177 conditionTokens = preprocess(null, conditionTokens); 178 179 // if there are still identifiers left, we are missing values for them 180 if (conditionTokens.isEmpty() || TokenStreamUtils.containsAny(conditionTokens, ETokenType.IDENTIFIER)) { 181 return false; 182 } 183 184 if (conditionTokens.size() == 1 && conditionTokens.get(0).getType() == ETokenType.INTEGER_LITERAL) { 185 return Integer.parseInt(conditionTokens.get(0).getText().replaceAll("[^0-9]", "")) != 0; 186 } 187 188 String expression = TokenStreamTextUtils.concatTokenTexts(CollectionUtils.filter(conditionTokens, 189 token -> token.getType().getTokenClass() != ETokenClass.COMMENT), StringUtils.SPACE); 190 expression = stripNumberSuffixes(expression); 191 return EXPRESSION_CACHE.obtain(expression); 192 } 193 194 /** 195 * Removes number suffixes for size and signedness (u, s, l, etc.) from decimal 196 * and hexadecimal numbers. 197 */ 198 private static String stripNumberSuffixes(String expression) { 199 return expression.replaceAll("(\\d|[a-fA-F])[uUsSlL]{1,2}", "$1"); 200 } 201 202 /** Expands "defined" statements. */ 203 private List<IToken> expandDefined(List<IToken> tokens) { 204 List<IToken> result = new ArrayList<>(); 205 for (int i = 0; i < tokens.size(); ++i) { 206 IToken token = tokens.get(i); 207 if (token.getType() == ETokenType.IDENTIFIER && token.getText().equals("defined")) { 208 if (i + 3 >= tokens.size() || !TokenStreamUtils.hasTokenTypeSequence(tokens, i + 1, ETokenType.LPAREN, 209 ETokenType.IDENTIFIER, ETokenType.RPAREN)) { 210 // broken define; break here 211 return result; 212 } 213 214 String macroName = tokens.get(i + 2).getText(); 215 String value = "0"; 216 if (macroProvider.isDefined(macroName)) { 217 value = "1"; 218 } 219 result.add(token.newToken(ETokenType.INTEGER_LITERAL, token.getOffset(), token.getLineNumber(), value, 220 token.getOriginId())); 221 222 // consume additional tokens 223 i += 3; 224 } else { 225 result.add(token); 226 } 227 } 228 return result; 229 } 230 231 /** 232 * Applies the inclusion state of the given if regions by updating the ignored 233 * regions. 234 */ 235 private static void applyIfRegions(List<IfRegionDescriptor> ifRegions, RegionSet ignoredRegions) { 236 for (IfRegionDescriptor ifRegion : ifRegions) { 237 if (ifRegion.isInclude()) { 238 ignoredRegions.add(new Region(ifRegion.getStartIndex(), ifRegion.getStartIndex())); 239 ignoredRegions.add(new Region(ifRegion.getEndIndex(), ifRegion.getEndIndex())); 240 } else { 241 ignoredRegions.add(new Region(ifRegion.getStartIndex(), ifRegion.getEndIndex())); 242 } 243 } 244 } 245 246 /** Evaluates a boolean expression. */ 247 private static Boolean evaluateExpression(String expression) { 248 try { 249 Object result; 250 251 // the remainder at this point is a boolean or 252 // arithmetic expression, which is compatible with 253 // JavaScript, so we use this as parser 254 synchronized (CONDITIONAL_EVALUATION_ENGINE) { 255 result = CONDITIONAL_EVALUATION_ENGINE.eval(expression); 256 } 257 258 if (result instanceof Boolean) { 259 return (Boolean) result; 260 } 261 if (result instanceof Integer) { 262 return 0 != (Integer) result; 263 } 264 return false; 265 } catch (ScriptException e) { 266 // not parseable 267 return false; 268 } 269 } 270}