001package eu.cqse.check.util.clang;
002
003import java.io.PrintStream;
004import java.util.ArrayList;
005import java.util.EnumSet;
006import java.util.List;
007import java.util.Optional;
008
009import org.conqat.lib.commons.assertion.CCSMAssert;
010import org.conqat.lib.commons.string.StringUtils;
011
012import eu.cqse.check.CheckTextRegionLocation;
013import eu.cqse.check.framework.scanner.ELanguage;
014import eu.cqse.check.framework.scanner.IToken;
015import eu.cqse.clang.CXChildVisitResult;
016import eu.cqse.clang.CXCursor;
017import eu.cqse.clang.CXCursorKind;
018import eu.cqse.clang.CXSourceLocation;
019import eu.cqse.clang.CXSourceRange;
020import eu.cqse.clang.CXType;
021import eu.cqse.clang.CXTypeKind;
022import eu.cqse.clang.Clang;
023import eu.cqse.clang.ClangBinding;
024import eu.cqse.clang.ClangSpellingLocationProperties;
025import eu.cqse.clang.SWIGTYPE_p_CXTranslationUnitImpl;
026import eu.cqse.clang.SWIGTYPE_p_void;
027
028/**
029 * Util methods for using the libclang API
030 */
031public class ClangUtils {
032
033        /**
034         * Languages for which we enable parsing with clang.
035         */
036        public static final EnumSet<ELanguage> CLANG_ENABLED_LANGUAGES = EnumSet.of(ELanguage.CPP, ELanguage.OBJECTIVE_C);
037
038        /*
039         * Flags for the {@link Clang#clang_getDiagnostic} method. Called with this
040         * flag, it will print the source location for errors and warnings including
041         * column numbers.
042         * 
043         * The magic numbers come from the C enum implementation (not exported properly
044         * by swig). Implement TS-19182 for fixing the export properly.
045         * https://github.com/llvm-mirror/clang/blob/release_80/include/clang-c/Index.h#
046         * L925 CXDiagnosticDisplayOptions.CXDiagnostic_DisplaySourceLocation = 0x01
047         * CXDiagnosticDisplayOptions.CXDiagnostic_DisplayColumn = 0x02
048         */
049        private static final int DIAGNOSTICS_FLAGS = 0x01 | 0x02;
050
051        /**
052         * Returns the direct children of the given cursor.
053         */
054        public static List<CXCursor> getDirectChildren(CXCursor cursor) {
055                List<CXCursor> children = new ArrayList<>();
056                ClangBinding.visitChildren(cursor, (childCursor, parent) -> {
057                        children.add(childCursor);
058                        return CXChildVisitResult.CXChildVisit_Continue;
059                });
060                return children;
061        }
062
063        /**
064         * Returns all children of the given cursor (recursive) that have the given
065         * {@link CXCursorKind}.
066         */
067        public static List<CXCursor> findChildrenWithKindRecursively(CXCursor cursor, CXCursorKind cursorKind) {
068                List<CXCursor> matchingCursors = new ArrayList<>();
069                ClangBinding.visitChildren(cursor, (childCursor, parent) -> {
070                        if (childCursor.getKind() == cursorKind) {
071                                matchingCursors.add(childCursor);
072                        }
073                        return CXChildVisitResult.CXChildVisit_Recurse;
074                });
075                return matchingCursors;
076        }
077
078        /**
079         * Prints the diagnostics information ("errors and warnings" in Clang) generated
080         * during construction of the given translationUnit to the given
081         * {@link PrintStream}.
082         * 
083         * Don't delete this method even if it is unused since we need it for debugging
084         * Clang.
085         */
086        @SuppressWarnings("unused")
087        public static void debugPrintDiagnostics(SWIGTYPE_p_CXTranslationUnitImpl translationUnit, PrintStream out) {
088                long numDiagnostics = Clang.clang_getNumDiagnostics(translationUnit);
089                for (long i = 0; i < numDiagnostics; i++) {
090                        SWIGTYPE_p_void diag = Clang.clang_getDiagnostic(translationUnit, i);
091                        out.println(Clang.clang_getCString(Clang.clang_formatDiagnostic(diag, DIAGNOSTICS_FLAGS)));
092                }
093        }
094
095        /**
096         * Prints the AST of the given translation unit to System.out.
097         * 
098         * Don't delete this method even if it is unused since we need it for debugging
099         * Clang.
100         */
101        @SuppressWarnings("unused")
102        public static void debugPrintTranslationUnitAST(SWIGTYPE_p_CXTranslationUnitImpl translationUnit) {
103                CXCursor topLevelCursor = Clang.clang_getTranslationUnitCursor(translationUnit);
104                ClangBinding.visitChildren(topLevelCursor, new PrintVisitor(new CursorPrinter(translationUnit), System.out));
105        }
106
107        /**
108         * Creates a {@link CheckTextRegionLocation} for the extent of the given clang
109         * cursor, if the given cursor is actually from the given file (and not some
110         * included header).
111         */
112        public static Optional<CheckTextRegionLocation> createTextRegionLocation(CXCursor cursor) {
113                CXSourceRange findingLocationExtent = Clang.clang_getCursorExtent(cursor);
114                CXSourceLocation findingLocationStart = Clang.clang_getRangeStart(findingLocationExtent);
115                if (Clang.clang_Location_isFromMainFile(findingLocationStart) == 0) {
116                        return Optional.empty();
117                }
118                ClangSpellingLocationProperties start = ClangBinding.getSpellingLocationProperties(findingLocationStart);
119                ClangSpellingLocationProperties end = ClangBinding
120                                .getSpellingLocationProperties(Clang.clang_getRangeEnd(findingLocationExtent));
121                return Optional.of(new CheckTextRegionLocation(start.getFile(), start.getOffset(), end.getOffset(),
122                                start.getLine(), end.getLine()));
123        }
124
125        /**
126         * Returns the code from the start to the end of the given {@link CXCursor}.
127         * This assumes that the cursors' TranslationUnit was created with the given
128         * code (same
129         * {@link eu.cqse.check.framework.core.phase.ECodeViewOption.ETextViewOption}
130         * text representation). Otherwise offsets will be wrong.
131         *
132         * Line breaks have been removed from the returned string (to simplify using it
133         * in user-visible messages).
134         *
135         * Just getting the name of the current cursor works for simple cases (with
136         * <code>Clang.clang_getCString(Clang.clang_getCursorSpelling(cursor))</code>),
137         * but if child is a more complex expression, this returns the empty string.
138         * Therefore, we get the text from the entire region of the cursor here.
139         */
140        public static Optional<String> getCompleteNodeText(CXCursor cursor, String mainFileContent) {
141                return createTextRegionLocation(cursor).map(textLocation -> {
142                        String textAtLocation = mainFileContent.substring(textLocation.rawStartOffset, textLocation.rawEndOffset);
143                        return StringUtils.replaceLineBreaks(textAtLocation, "");
144                });
145
146        }
147
148        /**
149         * Resolves the given type to its initial type definition.
150         *
151         * For example the type <code>X</code> in
152         * <code>typedef int A; typedef A B; typedef B X;</code> will be resolved to
153         * <code>int</code>.
154         */
155        public static CXType resolveTypedefs(CXType type) {
156                CXType resolvedType = type;
157                while (resolvedType.getKind() == CXTypeKind.CXType_Typedef) {
158                        resolvedType = Clang.clang_getTypedefDeclUnderlyingType(Clang.clang_getTypeDeclaration(resolvedType));
159                }
160                return resolvedType;
161        }
162
163        /**
164         * This is a hack to get the text of a binary operator. It is necessary since
165         * our clang JNI binding does not export the Clang BinaryOperator or
166         * CompoundAssignOperator functions (e.g., clang::BinaryOperator::getOpcode).
167         * 
168         * This function throws an assertion error if called with a cursor that does not
169         * have kind CXCursor_BinaryOperator. Otherwise, it tries to returns the
170         * operator as String (e.g., "=" or "+"). The operator is normalized
171         * (leading/trailing whitespace is removed, linebreaks replaced by " ").
172         * 
173         * This won't work on cursors that were created by macros (we try to detect such
174         * situations and return an Optional.empty).
175         *
176         * @param operatorCursor
177         *            Cursor of kind {@link CXCursorKind#CXCursor_BinaryOperator} or
178         *            {@link CXCursorKind#CXCursor_CompoundAssignOperator}
179         * @param completeFileText
180         *            The text of the file used to create the current translation unit.
181         *            The operator text is extracted from this string based on the
182         *            offsets reported by clang.
183         * @return An Optional containing the text of the operator (e.g. "=" or "+") or
184         *         Optional.empty if the operator text can't be determined (e.g., macro
185         *         problems)
186         */
187        public static Optional<String> getOperatorText(CXCursor operatorCursor, String completeFileText) {
188                CCSMAssert.isTrue(isBinaryOperatorOrCompoundAssignment(operatorCursor),
189                                () -> "Cursor must be of kind CXCursor_BinaryOperator or CXCursor_CompoundAssignOperator to use this "
190                                                + "method. It was " + Clang.clang_getCursorKind(operatorCursor).toString() + " instead.");
191                List<CXCursor> children = getDirectChildren(operatorCursor);
192                CXSourceLocation startLocation = Clang.clang_getRangeEnd(Clang.clang_getCursorExtent(children.get(0)));
193                CXSourceLocation endLocation = Clang.clang_getRangeStart(Clang.clang_getCursorExtent(children.get(1)));
194                if (Clang.clang_Location_isFromMainFile(startLocation) == 0
195                                || Clang.clang_Location_isFromMainFile(endLocation) == 0) {
196                        return Optional.empty();
197                }
198                ClangSpellingLocationProperties start = ClangBinding.getSpellingLocationProperties(startLocation);
199                ClangSpellingLocationProperties end = ClangBinding.getSpellingLocationProperties(endLocation);
200                if (start.getOffset() >= end.getOffset()) {
201                        // this was likely caused by a macro expansion
202                        return Optional.empty();
203                }
204
205                String operatorText = completeFileText.substring(start.getOffset(), end.getOffset());
206                operatorText = StringUtils.replaceLineBreaks(operatorText, " ").trim();
207                return Optional.of(operatorText);
208        }
209
210        /**
211         * Returns the cursor for a given token (if any found). This is the first cursor
212         * found in the tree with the exact same start/end offset as the given token.
213         * Depending on the structure of the AST and the offsets, this is not
214         * necessarily a leaf cursor.
215         */
216        public static Optional<CXCursor> getCursorForToken(IToken token, CXCursor rootCursor) {
217                return getCursorForStartEndOffset(token.getOffset(), token.getEndOffset() + 1, rootCursor);
218        }
219
220        /**
221         * Returns the cursor for a given start/end offset (if any found). This is the
222         * first cursor found in the tree with the exact start/end offset. Depending on
223         * the structure of the AST and the offsets, this is not necessarily a leaf
224         * cursor.
225         */
226        public static Optional<CXCursor> getCursorForStartEndOffset(int inclusiveStartOffset, int exclusiveEndOffset,
227                        CXCursor rootCursor) {
228                List<CXCursor> matches = new ArrayList<>();
229                ClangBinding.visitChildren(rootCursor, (childCursor, parent) -> {
230                        CXSourceRange extent = Clang.clang_getCursorExtent(childCursor);
231                        try {
232                                int cursorStartOffset = ClangBinding.getSpellingLocationProperties(Clang.clang_getRangeStart(extent))
233                                                .getOffset();
234                                int cursorEndOffset = ClangBinding.getSpellingLocationProperties(Clang.clang_getRangeEnd(extent))
235                                                .getOffset();
236
237                                if (cursorStartOffset > inclusiveStartOffset) {
238                                        return CXChildVisitResult.CXChildVisit_Break;
239                                }
240                                if (cursorEndOffset < exclusiveEndOffset) {
241                                        return CXChildVisitResult.CXChildVisit_Continue;
242                                }
243                                if (cursorStartOffset < inclusiveStartOffset || cursorEndOffset > exclusiveEndOffset) {
244                                        return CXChildVisitResult.CXChildVisit_Recurse;
245                                }
246
247                                matches.add(childCursor);
248                                return CXChildVisitResult.CXChildVisit_Break;
249                        } catch (RuntimeException e) {
250                                // this happens when we recurse into headers
251                                return CXChildVisitResult.CXChildVisit_Break;
252                        }
253                });
254                return matches.stream().findAny();
255        }
256
257        /**
258         * Returns true if the this is a {@link CXCursor} for a
259         * {@link CXCursorKind#CXCursor_BinaryOperator} (e.g. +, -, = etc.) or a
260         * {@link CXCursorKind#CXCursor_CompoundAssignOperator} (e.g. +=, -= etc.).
261         * Since both have a left hand- and right hand side with the operator in the
262         * middle, these operands are handled together for convenience.
263         */
264        public static boolean isBinaryOperatorOrCompoundAssignment(CXCursor cursor) {
265                CXCursorKind cursorKind = Clang.clang_getCursorKind(cursor);
266                return cursorKind == CXCursorKind.CXCursor_BinaryOperator
267                                || cursorKind == CXCursorKind.CXCursor_CompoundAssignOperator;
268        }
269}