• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020 Google, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.common.truth;
17 
18 import static java.lang.Math.max;
19 import static java.lang.Math.min;
20 
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26 
27 /**
28  * A custom implementation of the diff algorithm based on the solution described at
29  * https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
30  *
31  * @author Yun Peng (pcloudy@google.com)
32  */
33 final class DiffUtils {
34   // A list of unique strings appeared in compared texts.
35   // The index of each string is its incremental Id.
36   private final List<String> stringList = new ArrayList<>();
37   // A map to record each unique string and its incremental id.
38   private final Map<String, Integer> stringToId = new HashMap<>();
39   private int[] original;
40   private int[] revised;
41   // lcs[i][j] is the length of the longest common sequence of original[1..i] and revised[1..j].
42   private int[][] lcs;
43   private final List<Character> unifiedDiffType = new ArrayList<>();
44   private final List<Integer> unifiedDiffContentId = new ArrayList<>();
45   private final List<String> reducedUnifiedDiff = new ArrayList<>();
46   private int offsetHead = 0;
47   private int offsetTail = 0;
48 
diff( List<String> originalLines, List<String> revisedLines, int contextSize)49   private List<String> diff(
50       List<String> originalLines, List<String> revisedLines, int contextSize) {
51     reduceEqualLinesFromHeadAndTail(originalLines, revisedLines, contextSize);
52     originalLines = originalLines.subList(offsetHead, originalLines.size() - offsetTail);
53     revisedLines = revisedLines.subList(offsetHead, revisedLines.size() - offsetTail);
54 
55     original = new int[originalLines.size() + 1];
56     revised = new int[revisedLines.size() + 1];
57     lcs = new int[originalLines.size() + 1][revisedLines.size() + 1];
58 
59     for (int i = 0; i < originalLines.size(); i++) {
60       original[i + 1] = getIdByLine(originalLines.get(i));
61     }
62     for (int i = 0; i < revisedLines.size(); i++) {
63       revised[i + 1] = getIdByLine(revisedLines.get(i));
64     }
65 
66     for (int i = 1; i < original.length; i++) {
67       for (int j = 1; j < revised.length; j++) {
68         if (original[i] == revised[j]) {
69           lcs[i][j] = lcs[i - 1][j - 1] + 1;
70         } else {
71           lcs[i][j] = max(lcs[i][j - 1], lcs[i - 1][j]);
72         }
73       }
74     }
75 
76     calcUnifiedDiff(originalLines.size(), revisedLines.size());
77 
78     calcReducedUnifiedDiff(contextSize);
79 
80     return reducedUnifiedDiff;
81   }
82 
83   /** Calculate an incremental Id for a given string. */
getIdByLine(String line)84   private Integer getIdByLine(String line) {
85     int newId = stringList.size();
86     Integer existingId = stringToId.put(line, newId);
87     if (existingId == null) {
88       stringList.add(line);
89       return newId;
90     } else {
91       stringToId.put(line, existingId);
92       return existingId;
93     }
94   }
95 
96   /** An optimization to reduce the problem size by removing equal lines from head and tail. */
reduceEqualLinesFromHeadAndTail( List<String> original, List<String> revised, int contextSize)97   private void reduceEqualLinesFromHeadAndTail(
98       List<String> original, List<String> revised, int contextSize) {
99     int head = 0;
100     int maxHead = min(original.size(), revised.size());
101     while (head < maxHead && original.get(head).equals(revised.get(head))) {
102       head++;
103     }
104     head = max(head - contextSize, 0);
105     offsetHead = head;
106 
107     int tail = 0;
108     int maxTail = min(original.size() - head - contextSize, revised.size() - head - contextSize);
109     while (tail < maxTail
110         && original
111             .get(original.size() - 1 - tail)
112             .equals(revised.get(revised.size() - 1 - tail))) {
113       tail++;
114     }
115     tail = max(tail - contextSize, 0);
116     offsetTail = tail;
117   }
118 
calcUnifiedDiff(int i, int j)119   private void calcUnifiedDiff(int i, int j) {
120     while (i > 0 || j > 0) {
121       if (i > 0
122           && j > 0
123           && original[i] == revised[j]
124           // Make sure the diff output is identical to the diff command line tool when there are
125           // multiple solutions.
126           && lcs[i - 1][j - 1] + 1 > lcs[i - 1][j]
127           && lcs[i - 1][j - 1] + 1 > lcs[i][j - 1]) {
128         unifiedDiffType.add(' ');
129         unifiedDiffContentId.add(original[i]);
130         i--;
131         j--;
132       } else if (j > 0 && (i == 0 || lcs[i][j - 1] >= lcs[i - 1][j])) {
133         unifiedDiffType.add('+');
134         unifiedDiffContentId.add(revised[j]);
135         j--;
136       } else if (i > 0 && (j == 0 || lcs[i][j - 1] < lcs[i - 1][j])) {
137         unifiedDiffType.add('-');
138         unifiedDiffContentId.add(original[i]);
139         i--;
140       }
141     }
142     Collections.reverse(unifiedDiffType);
143     Collections.reverse(unifiedDiffContentId);
144   }
145 
146   /**
147    * Generate the unified diff with a given context size
148    *
149    * @param contextSize The context size we should leave at the beginning and end of each block.
150    */
calcReducedUnifiedDiff(int contextSize)151   private void calcReducedUnifiedDiff(int contextSize) {
152     // The index of the next line we're going to process in fullDiff.
153     int next = 0;
154     // The number of lines in original/revised file after the diff lines we've processed.
155     int lineNumOrigin = offsetHead;
156     int lineNumRevised = offsetHead;
157     while (next < unifiedDiffType.size()) {
158       // The start and end index of the current block in fullDiff
159       int start;
160       int end;
161       // The start line number of the block in original/revised file.
162       int startLineOrigin;
163       int startLineRevised;
164       // Find the next diff line that is not an equal line.
165       while (next < unifiedDiffType.size() && unifiedDiffType.get(next).equals(' ')) {
166         next++;
167         lineNumOrigin++;
168         lineNumRevised++;
169       }
170       if (next == unifiedDiffType.size()) {
171         break;
172       }
173       // Calculate the start line index of the current block in fullDiff
174       start = max(0, next - contextSize);
175 
176       // Record the start line number in original and revised file of the current block
177       startLineOrigin = lineNumOrigin - (next - start - 1);
178       startLineRevised = lineNumRevised - (next - start - 1);
179 
180       // The number of consecutive equal lines in fullDiff, we must find at least
181       // contextSize * 2 + 1 equal lines to identify the end of the block.
182       int equalLines = 0;
183       // Let `end` points to the last non-equal diff line
184       end = next;
185       while (next < unifiedDiffType.size() && equalLines < contextSize * 2 + 1) {
186         if (unifiedDiffType.get(next).equals(' ')) {
187           equalLines++;
188           lineNumOrigin++;
189           lineNumRevised++;
190         } else {
191           equalLines = 0;
192           // Record the latest non-equal diff line
193           end = next;
194           if (unifiedDiffType.get(next).equals('-')) {
195             lineNumOrigin++;
196           } else {
197             // line starts with "+"
198             lineNumRevised++;
199           }
200         }
201         next++;
202       }
203       // Calculate the end line index of the current block in fullDiff
204       end = min(end + contextSize + 1, unifiedDiffType.size());
205 
206       // Calculate the size of the block content in original/revised file
207       int blockSizeOrigin = lineNumOrigin - startLineOrigin - (next - end - 1);
208       int blockSizeRevised = lineNumRevised - startLineRevised - (next - end - 1);
209 
210       StringBuilder header = new StringBuilder();
211       header
212           .append("@@ -")
213           .append(startLineOrigin)
214           .append(",")
215           .append(blockSizeOrigin)
216           .append(" +")
217           .append(startLineRevised)
218           .append(",")
219           .append(blockSizeRevised)
220           .append(" @@");
221 
222       reducedUnifiedDiff.add(header.toString());
223       for (int i = start; i < end; i++) {
224         reducedUnifiedDiff.add(
225             unifiedDiffType.get(i) + stringList.get(unifiedDiffContentId.get(i)));
226       }
227     }
228   }
229 
generateUnifiedDiff( List<String> original, List<String> revised, int contextSize)230   static List<String> generateUnifiedDiff(
231       List<String> original, List<String> revised, int contextSize) {
232     return new DiffUtils().diff(original, revised, contextSize);
233   }
234 }
235