001/*-----------------------------------------------------------------------+ 002 | com.teamscale.ui 003 | | 004 $Id$ 005 | | 006 | Copyright (c) 2009-2013 CQSE GmbH | 007 +-----------------------------------------------------------------------*/ 008package org.conqat.lib.commons.diff; 009 010import java.util.ArrayList; 011import java.util.List; 012import java.util.regex.Matcher; 013import java.util.regex.Pattern; 014 015import org.conqat.lib.commons.algo.Diff; 016import org.conqat.lib.commons.algo.Diff.Delta; 017import org.conqat.lib.commons.collections.CollectionUtils; 018import org.conqat.lib.commons.collections.Pair; 019 020/** 021 * Base class for the code that produces a {@link DiffDescription} for two 022 * elements. 023 */ 024public abstract class DifferBase<T> { 025 026 /** 027 * Maximal size of the delta computed during diff calculation. If there are 028 * more than this number of changes, a partial diff will be displayed (i.e. 029 * only changes in the top region). The reason to limit this, is that the 030 * used algorithm uses O(D^2) memory for a diff of size D. 031 */ 032 private static final int MAX_DIFF_SIZE = 10000; 033 034 /** 035 * Pattern for extracting fragments. This matches consecutive word 036 * characters, consecutive digits, consecutive whitespace, or a single 037 * character. 038 */ 039 private static final Pattern FRAGMENT_MATCH_PATTERN = Pattern.compile("\\w+|\\d+|\\s+|."); 040 041 /** Text of the left element. */ 042 private String leftText; 043 044 /** Text of the right element. */ 045 private String rightText; 046 047 /** Chunks for the left element. */ 048 private List<TextChunk> leftChunks; 049 050 /** Chunks for the right element. */ 051 private List<TextChunk> rightChunks; 052 053 /** Performs the diff and returns a {@link DiffDescription}. */ 054 public DiffDescription performDiff(T leftElement, T rightElement) { 055 leftText = getElementText(leftElement); 056 rightText = getElementText(rightElement); 057 058 leftChunks = getChunks(leftElement, true); 059 rightChunks = getChunks(rightElement, false); 060 Delta<TextChunk> delta = Diff.computeDelta(leftChunks, rightChunks, MAX_DIFF_SIZE); 061 062 DiffDescription diffDescription = new DiffDescription(getDiffName()); 063 064 convertDiffToDiffDescription(delta, diffDescription); 065 066 return postProcessDiffDescription(diffDescription); 067 } 068 069 /** 070 * Post processing of the diffDescription. 071 */ 072 protected DiffDescription postProcessDiffDescription(DiffDescription diffDescription) { 073 // default does no post processing 074 return diffDescription; 075 } 076 077 /** Get the text of the element. */ 078 protected abstract String getElementText(T element); 079 080 /** 081 * Processes the given diff and inserts the changed regions into the diff 082 * description. 083 */ 084 private void convertDiffToDiffDescription(Delta<TextChunk> delta, DiffDescription diffDescription) { 085 boolean[] chunkRemoved = new boolean[leftChunks.size()]; 086 boolean[] chunkAdded = new boolean[rightChunks.size()]; 087 088 for (int i = 0; i < delta.getSize(); i++) { 089 int position = delta.getPosition(i); 090 if (position < 0) { // chunk was removed 091 chunkRemoved[-position - 1] = true; 092 } else { // chunk was added 093 chunkAdded[position - 1] = true; 094 } 095 } 096 097 int leftChunkPosition = 0; 098 int rightChunkPosition = 0; 099 100 while (leftChunkPosition < chunkRemoved.length || rightChunkPosition < chunkAdded.length) { 101 int removedChunksRegionStart = leftChunkPosition; 102 int addedChunksRegionStart = rightChunkPosition; 103 104 // Moving leftChunkPosition to the end of the removed chunks 105 while (leftChunkPosition < chunkRemoved.length && chunkRemoved[leftChunkPosition]) { 106 leftChunkPosition++; 107 } 108 109 // Moving rightChunkPosition to the end of the added chunks 110 while (rightChunkPosition < chunkAdded.length && chunkAdded[rightChunkPosition]) { 111 rightChunkPosition++; 112 } 113 114 // Check if there where actual changes 115 if (removedChunksRegionStart != leftChunkPosition || addedChunksRegionStart != rightChunkPosition) { 116 insertDiffRegion(diffDescription, removedChunksRegionStart, leftChunkPosition, addedChunksRegionStart, 117 rightChunkPosition); 118 } else { 119 leftChunkPosition++; 120 rightChunkPosition++; 121 } 122 } 123 } 124 125 /** 126 * Inserts a diff region based on unit indexes. Start indexes are inclusive, 127 * end indexes are exclusive. 128 */ 129 private void insertDiffRegion(DiffDescription diffDescription, int currentLeftStart, int currentLeftEnd, 130 int currentRightStart, int currentRightEnd) { 131 132 Pair<Integer, Integer> leftLines = convertToFirstLastLine(currentLeftStart, currentLeftEnd, leftChunks); 133 Pair<Integer, Integer> rightLines = convertToFirstLastLine(currentRightStart, currentRightEnd, rightChunks); 134 diffDescription.addLineRegion(leftLines.getFirst(), leftLines.getSecond(), rightLines.getFirst(), 135 rightLines.getSecond()); 136 137 List<TextChunk> leftFragments = extractFragments(currentLeftStart, currentLeftEnd, leftText, leftChunks); 138 List<TextChunk> rightFragments = extractFragments(currentRightStart, currentRightEnd, rightText, rightChunks); 139 Delta<TextChunk> delta = Diff.computeDelta(leftFragments, rightFragments, MAX_DIFF_SIZE); 140 for (int i = 0; i < delta.getSize(); ++i) { 141 int position = delta.getPosition(i); 142 if (position > 0) { 143 TextChunk fragment = rightFragments.get(position - 1); 144 diffDescription.addRightChange(fragment.startOffset, fragment.endOffset); 145 } else { 146 TextChunk fragment = leftFragments.get(-position - 1); 147 diffDescription.addLeftChange(fragment.startOffset, fragment.endOffset); 148 } 149 } 150 } 151 152 /** 153 * Converts start/end indexes to start/end lines (start inclusive, end 154 * exclusive). Line indices are starting from 1, indices for units from 0. If 155 * units is empty or the endIndex is 0, the method returns new Pair(1,1). 156 */ 157 private static Pair<Integer, Integer> convertToFirstLastLine(int startIndex, int endIndex, 158 List<TextChunk> units) { 159 if (units.isEmpty() || endIndex == 0) { 160 return new Pair<>(1, 1); 161 } 162 163 int firstLine; 164 if (startIndex >= units.size()) { 165 firstLine = units.get(startIndex - 1).endLine; 166 } else { 167 firstLine = units.get(startIndex).startLine; 168 } 169 int lastLine = Math.max(firstLine, units.get(endIndex - 1).endLine); 170 return new Pair<Integer, Integer>(firstLine, lastLine); 171 } 172 173 /** 174 * Extracts fragments for the diff region denoted by start/end indexes in 175 * the chunks. Fragments are used for "diff in diff" to highlight the 176 * actually changed parts. 177 * 178 * @param startIndex 179 * The zero-based index of the chunk in the given units which 180 * marks the start of the region 181 * @param endIndex 182 * The zero-based index of the chunk in the given units which 183 * marks the end of the region (exclusive) 184 * @param text 185 * The text to extract the region from 186 * @param units 187 * The chunks of the text. Used to find out, the substring of the 188 * region inside the given text 189 * 190 * @return The region split into chunks of fragments denoted by the 191 * {@link #FRAGMENT_MATCH_PATTERN} 192 */ 193 private static List<TextChunk> extractFragments(int startIndex, int endIndex, String text, List<TextChunk> units) { 194 if (startIndex >= units.size() || startIndex >= endIndex) { 195 return CollectionUtils.emptyList(); 196 } 197 198 List<TextChunk> fragments = new ArrayList<TextChunk>(); 199 int startOffset = units.get(startIndex).startOffset; 200 int endOffset = Math.min(units.get(endIndex - 1).endOffset, text.length()); 201 202 Matcher matcher = FRAGMENT_MATCH_PATTERN.matcher(text.substring(startOffset, endOffset)); 203 while (matcher.find()) { 204 int fragmentStart = startOffset + matcher.start(); 205 String fragmentText = matcher.group(); 206 207 // ignore fragments with new lines, as they are only filler 208 // white-space 209 if (fragmentText.contains("\n")) { 210 continue; 211 } 212 213 // lines are not used and thus initialized with dummy values 214 fragments.add(new TextChunk(fragmentStart, fragmentStart + fragmentText.length(), 0, 0, fragmentText)); 215 } 216 217 return fragments; 218 } 219 220 /** 221 * Returns the chunks for an element. 222 * 223 * @param isLeft 224 * indicates whether this method is called for the left or right 225 * element. The implementer then may behave differently for both 226 * elements (possibly depending on additional information). 227 */ 228 protected abstract List<TextChunk> getChunks(T element, boolean isLeft); 229 230 /** Returns the name for this diff. */ 231 protected abstract String getDiffName(); 232 233}