1 /* 2 * Copyright (c) 2020 Google, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.common.truth; 17 18 import static java.lang.Math.max; 19 import static java.lang.Math.min; 20 21 import java.util.ArrayList; 22 import java.util.Collections; 23 import java.util.HashMap; 24 import java.util.List; 25 import java.util.Map; 26 27 /** 28 * A custom implementation of the diff algorithm based on the solution described at 29 * https://en.wikipedia.org/wiki/Longest_common_subsequence_problem 30 * 31 * @author Yun Peng (pcloudy@google.com) 32 */ 33 final class DiffUtils { 34 // A list of unique strings appeared in compared texts. 35 // The index of each string is its incremental Id. 36 private final List<String> stringList = new ArrayList<>(); 37 // A map to record each unique string and its incremental id. 38 private final Map<String, Integer> stringToId = new HashMap<>(); 39 private int[] original; 40 private int[] revised; 41 // lcs[i][j] is the length of the longest common sequence of original[1..i] and revised[1..j]. 42 private int[][] lcs; 43 private final List<Character> unifiedDiffType = new ArrayList<>(); 44 private final List<Integer> unifiedDiffContentId = new ArrayList<>(); 45 private final List<String> reducedUnifiedDiff = new ArrayList<>(); 46 private int offsetHead = 0; 47 private int offsetTail = 0; 48 diff( List<String> originalLines, List<String> revisedLines, int contextSize)49 private List<String> diff( 50 List<String> originalLines, List<String> revisedLines, int contextSize) { 51 reduceEqualLinesFromHeadAndTail(originalLines, revisedLines, contextSize); 52 originalLines = originalLines.subList(offsetHead, originalLines.size() - offsetTail); 53 revisedLines = revisedLines.subList(offsetHead, revisedLines.size() - offsetTail); 54 55 original = new int[originalLines.size() + 1]; 56 revised = new int[revisedLines.size() + 1]; 57 lcs = new int[originalLines.size() + 1][revisedLines.size() + 1]; 58 59 for (int i = 0; i < originalLines.size(); i++) { 60 original[i + 1] = getIdByLine(originalLines.get(i)); 61 } 62 for (int i = 0; i < revisedLines.size(); i++) { 63 revised[i + 1] = getIdByLine(revisedLines.get(i)); 64 } 65 66 for (int i = 1; i < original.length; i++) { 67 for (int j = 1; j < revised.length; j++) { 68 if (original[i] == revised[j]) { 69 lcs[i][j] = lcs[i - 1][j - 1] + 1; 70 } else { 71 lcs[i][j] = max(lcs[i][j - 1], lcs[i - 1][j]); 72 } 73 } 74 } 75 76 calcUnifiedDiff(originalLines.size(), revisedLines.size()); 77 78 calcReducedUnifiedDiff(contextSize); 79 80 return reducedUnifiedDiff; 81 } 82 83 /** Calculate an incremental Id for a given string. */ getIdByLine(String line)84 private Integer getIdByLine(String line) { 85 int newId = stringList.size(); 86 Integer existingId = stringToId.put(line, newId); 87 if (existingId == null) { 88 stringList.add(line); 89 return newId; 90 } else { 91 stringToId.put(line, existingId); 92 return existingId; 93 } 94 } 95 96 /** An optimization to reduce the problem size by removing equal lines from head and tail. */ reduceEqualLinesFromHeadAndTail( List<String> original, List<String> revised, int contextSize)97 private void reduceEqualLinesFromHeadAndTail( 98 List<String> original, List<String> revised, int contextSize) { 99 int head = 0; 100 int maxHead = min(original.size(), revised.size()); 101 while (head < maxHead && original.get(head).equals(revised.get(head))) { 102 head++; 103 } 104 head = max(head - contextSize, 0); 105 offsetHead = head; 106 107 int tail = 0; 108 int maxTail = min(original.size() - head - contextSize, revised.size() - head - contextSize); 109 while (tail < maxTail 110 && original 111 .get(original.size() - 1 - tail) 112 .equals(revised.get(revised.size() - 1 - tail))) { 113 tail++; 114 } 115 tail = max(tail - contextSize, 0); 116 offsetTail = tail; 117 } 118 calcUnifiedDiff(int i, int j)119 private void calcUnifiedDiff(int i, int j) { 120 while (i > 0 || j > 0) { 121 if (i > 0 122 && j > 0 123 && original[i] == revised[j] 124 // Make sure the diff output is identical to the diff command line tool when there are 125 // multiple solutions. 126 && lcs[i - 1][j - 1] + 1 > lcs[i - 1][j] 127 && lcs[i - 1][j - 1] + 1 > lcs[i][j - 1]) { 128 unifiedDiffType.add(' '); 129 unifiedDiffContentId.add(original[i]); 130 i--; 131 j--; 132 } else if (j > 0 && (i == 0 || lcs[i][j - 1] >= lcs[i - 1][j])) { 133 unifiedDiffType.add('+'); 134 unifiedDiffContentId.add(revised[j]); 135 j--; 136 } else if (i > 0 && (j == 0 || lcs[i][j - 1] < lcs[i - 1][j])) { 137 unifiedDiffType.add('-'); 138 unifiedDiffContentId.add(original[i]); 139 i--; 140 } 141 } 142 Collections.reverse(unifiedDiffType); 143 Collections.reverse(unifiedDiffContentId); 144 } 145 146 /** 147 * Generate the unified diff with a given context size 148 * 149 * @param contextSize The context size we should leave at the beginning and end of each block. 150 */ calcReducedUnifiedDiff(int contextSize)151 private void calcReducedUnifiedDiff(int contextSize) { 152 // The index of the next line we're going to process in fullDiff. 153 int next = 0; 154 // The number of lines in original/revised file after the diff lines we've processed. 155 int lineNumOrigin = offsetHead; 156 int lineNumRevised = offsetHead; 157 while (next < unifiedDiffType.size()) { 158 // The start and end index of the current block in fullDiff 159 int start; 160 int end; 161 // The start line number of the block in original/revised file. 162 int startLineOrigin; 163 int startLineRevised; 164 // Find the next diff line that is not an equal line. 165 while (next < unifiedDiffType.size() && unifiedDiffType.get(next).equals(' ')) { 166 next++; 167 lineNumOrigin++; 168 lineNumRevised++; 169 } 170 if (next == unifiedDiffType.size()) { 171 break; 172 } 173 // Calculate the start line index of the current block in fullDiff 174 start = max(0, next - contextSize); 175 176 // Record the start line number in original and revised file of the current block 177 startLineOrigin = lineNumOrigin - (next - start - 1); 178 startLineRevised = lineNumRevised - (next - start - 1); 179 180 // The number of consecutive equal lines in fullDiff, we must find at least 181 // contextSize * 2 + 1 equal lines to identify the end of the block. 182 int equalLines = 0; 183 // Let `end` points to the last non-equal diff line 184 end = next; 185 while (next < unifiedDiffType.size() && equalLines < contextSize * 2 + 1) { 186 if (unifiedDiffType.get(next).equals(' ')) { 187 equalLines++; 188 lineNumOrigin++; 189 lineNumRevised++; 190 } else { 191 equalLines = 0; 192 // Record the latest non-equal diff line 193 end = next; 194 if (unifiedDiffType.get(next).equals('-')) { 195 lineNumOrigin++; 196 } else { 197 // line starts with "+" 198 lineNumRevised++; 199 } 200 } 201 next++; 202 } 203 // Calculate the end line index of the current block in fullDiff 204 end = min(end + contextSize + 1, unifiedDiffType.size()); 205 206 // Calculate the size of the block content in original/revised file 207 int blockSizeOrigin = lineNumOrigin - startLineOrigin - (next - end - 1); 208 int blockSizeRevised = lineNumRevised - startLineRevised - (next - end - 1); 209 210 StringBuilder header = new StringBuilder(); 211 header 212 .append("@@ -") 213 .append(startLineOrigin) 214 .append(",") 215 .append(blockSizeOrigin) 216 .append(" +") 217 .append(startLineRevised) 218 .append(",") 219 .append(blockSizeRevised) 220 .append(" @@"); 221 222 reducedUnifiedDiff.add(header.toString()); 223 for (int i = start; i < end; i++) { 224 reducedUnifiedDiff.add( 225 unifiedDiffType.get(i) + stringList.get(unifiedDiffContentId.get(i))); 226 } 227 } 228 } 229 generateUnifiedDiff( List<String> original, List<String> revised, int contextSize)230 static List<String> generateUnifiedDiff( 231 List<String> original, List<String> revised, int contextSize) { 232 return new DiffUtils().diff(original, revised, contextSize); 233 } 234 } 235