001package eu.cqse.check.util.clang; 002 003import java.io.PrintStream; 004import java.util.ArrayList; 005import java.util.EnumSet; 006import java.util.List; 007import java.util.Optional; 008 009import org.conqat.lib.commons.assertion.CCSMAssert; 010import org.conqat.lib.commons.string.StringUtils; 011 012import eu.cqse.check.CheckTextRegionLocation; 013import eu.cqse.check.framework.scanner.ELanguage; 014import eu.cqse.check.framework.scanner.IToken; 015import eu.cqse.clang.CXChildVisitResult; 016import eu.cqse.clang.CXCursor; 017import eu.cqse.clang.CXCursorKind; 018import eu.cqse.clang.CXSourceLocation; 019import eu.cqse.clang.CXSourceRange; 020import eu.cqse.clang.CXType; 021import eu.cqse.clang.CXTypeKind; 022import eu.cqse.clang.Clang; 023import eu.cqse.clang.ClangBinding; 024import eu.cqse.clang.ClangSpellingLocationProperties; 025import eu.cqse.clang.SWIGTYPE_p_CXTranslationUnitImpl; 026import eu.cqse.clang.SWIGTYPE_p_void; 027 028/** 029 * Util methods for using the libclang API 030 */ 031public class ClangUtils { 032 033 /** 034 * Languages for which we enable parsing with clang. 035 */ 036 public static final EnumSet<ELanguage> CLANG_ENABLED_LANGUAGES = EnumSet.of(ELanguage.CPP, ELanguage.OBJECTIVE_C); 037 038 /* 039 * Flags for the {@link Clang#clang_getDiagnostic} method. Called with this 040 * flag, it will print the source location for errors and warnings including 041 * column numbers. 042 * 043 * The magic numbers come from the C enum implementation (not exported properly 044 * by swig). Implement TS-19182 for fixing the export properly. 045 * https://github.com/llvm-mirror/clang/blob/release_80/include/clang-c/Index.h# 046 * L925 CXDiagnosticDisplayOptions.CXDiagnostic_DisplaySourceLocation = 0x01 047 * CXDiagnosticDisplayOptions.CXDiagnostic_DisplayColumn = 0x02 048 */ 049 private static final int DIAGNOSTICS_FLAGS = 0x01 | 0x02; 050 051 /** 052 * Returns the direct children of the given cursor. 053 */ 054 public static List<CXCursor> getDirectChildren(CXCursor cursor) { 055 List<CXCursor> children = new ArrayList<>(); 056 ClangBinding.visitChildren(cursor, (childCursor, parent) -> { 057 children.add(childCursor); 058 return CXChildVisitResult.CXChildVisit_Continue; 059 }); 060 return children; 061 } 062 063 /** 064 * Returns all children of the given cursor (recursive) that have the given 065 * {@link CXCursorKind}. 066 */ 067 public static List<CXCursor> findChildrenWithKindRecursively(CXCursor cursor, CXCursorKind cursorKind) { 068 List<CXCursor> matchingCursors = new ArrayList<>(); 069 ClangBinding.visitChildren(cursor, (childCursor, parent) -> { 070 if (childCursor.getKind() == cursorKind) { 071 matchingCursors.add(childCursor); 072 } 073 return CXChildVisitResult.CXChildVisit_Recurse; 074 }); 075 return matchingCursors; 076 } 077 078 /** 079 * Prints the diagnostics information ("errors and warnings" in Clang) generated 080 * during construction of the given translationUnit to the given 081 * {@link PrintStream}. 082 * 083 * Don't delete this method even if it is unused since we need it for debugging 084 * Clang. 085 */ 086 @SuppressWarnings("unused") 087 public static void debugPrintDiagnostics(SWIGTYPE_p_CXTranslationUnitImpl translationUnit, PrintStream out) { 088 long numDiagnostics = Clang.clang_getNumDiagnostics(translationUnit); 089 for (long i = 0; i < numDiagnostics; i++) { 090 SWIGTYPE_p_void diag = Clang.clang_getDiagnostic(translationUnit, i); 091 out.println(Clang.clang_getCString(Clang.clang_formatDiagnostic(diag, DIAGNOSTICS_FLAGS))); 092 } 093 } 094 095 /** 096 * Prints the AST of the given translation unit to System.out. 097 * 098 * Don't delete this method even if it is unused since we need it for debugging 099 * Clang. 100 */ 101 @SuppressWarnings("unused") 102 public static void debugPrintTranslationUnitAST(SWIGTYPE_p_CXTranslationUnitImpl translationUnit) { 103 CXCursor topLevelCursor = Clang.clang_getTranslationUnitCursor(translationUnit); 104 ClangBinding.visitChildren(topLevelCursor, new PrintVisitor(new CursorPrinter(translationUnit), System.out)); 105 } 106 107 /** 108 * Creates a {@link CheckTextRegionLocation} for the extent of the given clang 109 * cursor, if the given cursor is actually from the given file (and not some 110 * included header). 111 */ 112 public static Optional<CheckTextRegionLocation> createTextRegionLocation(CXCursor cursor) { 113 CXSourceRange findingLocationExtent = Clang.clang_getCursorExtent(cursor); 114 CXSourceLocation findingLocationStart = Clang.clang_getRangeStart(findingLocationExtent); 115 if (Clang.clang_Location_isFromMainFile(findingLocationStart) == 0) { 116 return Optional.empty(); 117 } 118 ClangSpellingLocationProperties start = ClangBinding.getSpellingLocationProperties(findingLocationStart); 119 ClangSpellingLocationProperties end = ClangBinding 120 .getSpellingLocationProperties(Clang.clang_getRangeEnd(findingLocationExtent)); 121 return Optional.of(new CheckTextRegionLocation(start.getFile(), start.getOffset(), end.getOffset(), 122 start.getLine(), end.getLine())); 123 } 124 125 /** 126 * Returns the code from the start to the end of the given {@link CXCursor}. 127 * This assumes that the cursors' TranslationUnit was created with the given 128 * code (same 129 * {@link eu.cqse.check.framework.core.phase.ECodeViewOption.ETextViewOption} 130 * text representation). Otherwise offsets will be wrong. 131 * 132 * Line breaks have been removed from the returned string (to simplify using it 133 * in user-visible messages). 134 * 135 * Just getting the name of the current cursor works for simple cases (with 136 * <code>Clang.clang_getCString(Clang.clang_getCursorSpelling(cursor))</code>), 137 * but if child is a more complex expression, this returns the empty string. 138 * Therefore, we get the text from the entire region of the cursor here. 139 */ 140 public static Optional<String> getCompleteNodeText(CXCursor cursor, String mainFileContent) { 141 return createTextRegionLocation(cursor).map(textLocation -> { 142 String textAtLocation = mainFileContent.substring(textLocation.rawStartOffset, textLocation.rawEndOffset); 143 return StringUtils.replaceLineBreaks(textAtLocation, ""); 144 }); 145 146 } 147 148 /** 149 * Resolves the given type to its initial type definition. 150 * 151 * For example the type <code>X</code> in 152 * <code>typedef int A; typedef A B; typedef B X;</code> will be resolved to 153 * <code>int</code>. 154 */ 155 public static CXType resolveTypedefs(CXType type) { 156 CXType resolvedType = type; 157 while (resolvedType.getKind() == CXTypeKind.CXType_Typedef) { 158 resolvedType = Clang.clang_getTypedefDeclUnderlyingType(Clang.clang_getTypeDeclaration(resolvedType)); 159 } 160 return resolvedType; 161 } 162 163 /** 164 * This is a hack to get the text of a binary operator. It is necessary since 165 * our clang JNI binding does not export the Clang BinaryOperator or 166 * CompoundAssignOperator functions (e.g., clang::BinaryOperator::getOpcode). 167 * 168 * This function throws an assertion error if called with a cursor that does not 169 * have kind CXCursor_BinaryOperator. Otherwise, it tries to returns the 170 * operator as String (e.g., "=" or "+"). The operator is normalized 171 * (leading/trailing whitespace is removed, linebreaks replaced by " "). 172 * 173 * This won't work on cursors that were created by macros (we try to detect such 174 * situations and return an Optional.empty). 175 * 176 * @param operatorCursor 177 * Cursor of kind {@link CXCursorKind#CXCursor_BinaryOperator} or 178 * {@link CXCursorKind#CXCursor_CompoundAssignOperator} 179 * @param completeFileText 180 * The text of the file used to create the current translation unit. 181 * The operator text is extracted from this string based on the 182 * offsets reported by clang. 183 * @return An Optional containing the text of the operator (e.g. "=" or "+") or 184 * Optional.empty if the operator text can't be determined (e.g., macro 185 * problems) 186 */ 187 public static Optional<String> getOperatorText(CXCursor operatorCursor, String completeFileText) { 188 CCSMAssert.isTrue(isBinaryOperatorOrCompoundAssignment(operatorCursor), 189 () -> "Cursor must be of kind CXCursor_BinaryOperator or CXCursor_CompoundAssignOperator to use this " 190 + "method. It was " + Clang.clang_getCursorKind(operatorCursor).toString() + " instead."); 191 List<CXCursor> children = getDirectChildren(operatorCursor); 192 CXSourceLocation startLocation = Clang.clang_getRangeEnd(Clang.clang_getCursorExtent(children.get(0))); 193 CXSourceLocation endLocation = Clang.clang_getRangeStart(Clang.clang_getCursorExtent(children.get(1))); 194 if (Clang.clang_Location_isFromMainFile(startLocation) == 0 195 || Clang.clang_Location_isFromMainFile(endLocation) == 0) { 196 return Optional.empty(); 197 } 198 ClangSpellingLocationProperties start = ClangBinding.getSpellingLocationProperties(startLocation); 199 ClangSpellingLocationProperties end = ClangBinding.getSpellingLocationProperties(endLocation); 200 if (start.getOffset() >= end.getOffset()) { 201 // this was likely caused by a macro expansion 202 return Optional.empty(); 203 } 204 205 String operatorText = completeFileText.substring(start.getOffset(), end.getOffset()); 206 operatorText = StringUtils.replaceLineBreaks(operatorText, " ").trim(); 207 return Optional.of(operatorText); 208 } 209 210 /** 211 * Returns the cursor for a given token (if any found). This is the first cursor 212 * found in the tree with the exact same start/end offset as the given token. 213 * Depending on the structure of the AST and the offsets, this is not 214 * necessarily a leaf cursor. 215 */ 216 public static Optional<CXCursor> getCursorForToken(IToken token, CXCursor rootCursor) { 217 return getCursorForStartEndOffset(token.getOffset(), token.getEndOffset() + 1, rootCursor); 218 } 219 220 /** 221 * Returns the cursor for a given start/end offset (if any found). This is the 222 * first cursor found in the tree with the exact start/end offset. Depending on 223 * the structure of the AST and the offsets, this is not necessarily a leaf 224 * cursor. 225 */ 226 public static Optional<CXCursor> getCursorForStartEndOffset(int inclusiveStartOffset, int exclusiveEndOffset, 227 CXCursor rootCursor) { 228 List<CXCursor> matches = new ArrayList<>(); 229 ClangBinding.visitChildren(rootCursor, (childCursor, parent) -> { 230 CXSourceRange extent = Clang.clang_getCursorExtent(childCursor); 231 try { 232 int cursorStartOffset = ClangBinding.getSpellingLocationProperties(Clang.clang_getRangeStart(extent)) 233 .getOffset(); 234 int cursorEndOffset = ClangBinding.getSpellingLocationProperties(Clang.clang_getRangeEnd(extent)) 235 .getOffset(); 236 237 if (cursorStartOffset > inclusiveStartOffset) { 238 return CXChildVisitResult.CXChildVisit_Break; 239 } 240 if (cursorEndOffset < exclusiveEndOffset) { 241 return CXChildVisitResult.CXChildVisit_Continue; 242 } 243 if (cursorStartOffset < inclusiveStartOffset || cursorEndOffset > exclusiveEndOffset) { 244 return CXChildVisitResult.CXChildVisit_Recurse; 245 } 246 247 matches.add(childCursor); 248 return CXChildVisitResult.CXChildVisit_Break; 249 } catch (RuntimeException e) { 250 // this happens when we recurse into headers 251 return CXChildVisitResult.CXChildVisit_Break; 252 } 253 }); 254 return matches.stream().findAny(); 255 } 256 257 /** 258 * Returns true if the this is a {@link CXCursor} for a 259 * {@link CXCursorKind#CXCursor_BinaryOperator} (e.g. +, -, = etc.) or a 260 * {@link CXCursorKind#CXCursor_CompoundAssignOperator} (e.g. +=, -= etc.). 261 * Since both have a left hand- and right hand side with the operator in the 262 * middle, these operands are handled together for convenience. 263 */ 264 public static boolean isBinaryOperatorOrCompoundAssignment(CXCursor cursor) { 265 CXCursorKind cursorKind = Clang.clang_getCursorKind(cursor); 266 return cursorKind == CXCursorKind.CXCursor_BinaryOperator 267 || cursorKind == CXCursorKind.CXCursor_CompoundAssignOperator; 268 } 269}