1 package jdiff; 2 3 import java.io.*; 4 import java.util.*; 5 6 /** A class to compare vectors of objects. The result of comparison 7 is a list of <code>change</code> objects which form an 8 edit script. The objects compared are traditionally lines 9 of text from two files. Comparison options such as "ignore 10 whitespace" are implemented by modifying the <code>equals</code> 11 and <code>hashcode</code> methods for the objects compared. 12 <p> 13 The basic algorithm is described in: </br> 14 "An O(ND) Difference Algorithm and its Variations", Eugene Myers, 15 Algorithmica Vol. 1 No. 2, 1986, p 251. 16 <p> 17 This class outputs different results from GNU diff 1.15 on some 18 inputs. Our results are actually better (smaller change list, smaller 19 total size of changes), but it would be nice to know why. Perhaps 20 there is a memory overwrite bug in GNU diff 1.15. 21 22 @author Stuart D. Gathman, translated from GNU diff 1.15 23 Copyright (C) 2000 Business Management Systems, Inc. 24 <p> 25 This program is free software; you can redistribute it and/or modify 26 it under the terms of the GNU General Public License as published by 27 the Free Software Foundation; either version 1, or (at your option) 28 any later version. 29 <p> 30 This program is distributed in the hope that it will be useful, 31 but WITHOUT ANY WARRANTY; without even the implied warranty of 32 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 33 GNU General Public License for more details. 34 <p> 35 You should have received a copy of the <a href=COPYING.txt> 36 GNU General Public License</a> 37 along with this program; if not, write to the Free Software 38 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 39 40 */ 41 42 public class DiffMyers 43 { 44 45 /** Prepare to find differences between two arrays. Each element of 46 the arrays is translated to an "equivalence number" based on 47 the result of <code>equals</code>. The original Object arrays 48 are no longer needed for computing the differences. They will 49 be needed again later to print the results of the comparison as 50 an edit script, if desired. 51 */ DiffMyers(Object[] a,Object[] b)52 public DiffMyers(Object[] a,Object[] b) 53 { 54 Hashtable h = new Hashtable(a.length + b.length); 55 filevec[0] = new file_data(a,h); 56 filevec[1] = new file_data(b,h); 57 } 58 59 /** 1 more than the maximum equivalence value used for this or its 60 sibling file. */ 61 private int equiv_max = 1; 62 63 /** When set to true, the comparison uses a heuristic to speed it up. 64 With this heuristic, for files with a constant small density 65 of changes, the algorithm is linear in the file size. */ 66 public boolean heuristic = false; 67 68 /** When set to true, the algorithm returns a guarranteed minimal 69 set of changes. This makes things slower, sometimes much slower. */ 70 public boolean no_discards = false; 71 72 private int[] xvec, yvec; /* Vectors being compared. */ 73 private int[] fdiag; /* Vector, indexed by diagonal, containing 74 the X coordinate of the point furthest 75 along the given diagonal in the forward 76 search of the edit matrix. */ 77 private int[] bdiag; /* Vector, indexed by diagonal, containing 78 the X coordinate of the point furthest 79 along the given diagonal in the backward 80 search of the edit matrix. */ 81 private int fdiagoff, bdiagoff; 82 private final file_data[] filevec = new file_data[2]; 83 private int cost; 84 85 /** Find the midpoint of the shortest edit script for a specified 86 portion of the two files. 87 88 We scan from the beginnings of the files, and simultaneously from the ends, 89 doing a breadth-first search through the space of edit-sequence. 90 When the two searches meet, we have found the midpoint of the shortest 91 edit sequence. 92 93 The value returned is the number of the diagonal on which the midpoint lies. 94 The diagonal number equals the number of inserted lines minus the number 95 of deleted lines (counting only lines before the midpoint). 96 The edit cost is stored into COST; this is the total number of 97 lines inserted or deleted (counting only lines before the midpoint). 98 99 This function assumes that the first lines of the specified portions 100 of the two files do not match, and likewise that the last lines do not 101 match. The caller must trim matching lines from the beginning and end 102 of the portions it is going to specify. 103 104 Note that if we return the "wrong" diagonal value, or if 105 the value of bdiag at that diagonal is "wrong", 106 the worst this can do is cause suboptimal diff output. 107 It cannot cause incorrect diff output. */ 108 diag(int xoff, int xlim, int yoff, int ylim)109 private int diag (int xoff, int xlim, int yoff, int ylim) 110 { 111 final int[] fd = fdiag; // Give the compiler a chance. 112 final int[] bd = bdiag; // Additional help for the compiler. 113 final int[] xv = xvec; // Still more help for the compiler. 114 final int[] yv = yvec; // And more and more . . . 115 final int dmin = xoff - ylim; // Minimum valid diagonal. 116 final int dmax = xlim - yoff; // Maximum valid diagonal. 117 final int fmid = xoff - yoff; // Center diagonal of top-down search. 118 final int bmid = xlim - ylim; // Center diagonal of bottom-up search. 119 int fmin = fmid, fmax = fmid; // Limits of top-down search. 120 int bmin = bmid, bmax = bmid; // Limits of bottom-up search. 121 /* True if southeast corner is on an odd 122 diagonal with respect to the northwest. */ 123 final boolean odd = (fmid - bmid & 1) != 0; 124 125 fd[fdiagoff + fmid] = xoff; 126 bd[bdiagoff + bmid] = xlim; 127 128 for (int c = 1;; ++c) 129 { 130 int d; /* Active diagonal. */ 131 boolean big_snake = false; 132 133 /* Extend the top-down search by an edit step in each diagonal. */ 134 if (fmin > dmin) 135 fd[fdiagoff + --fmin - 1] = -1; 136 else 137 ++fmin; 138 if (fmax < dmax) 139 fd[fdiagoff + ++fmax + 1] = -1; 140 else 141 --fmax; 142 for (d = fmax; d >= fmin; d -= 2) 143 { 144 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1]; 145 146 if (tlo >= thi) 147 x = tlo + 1; 148 else 149 x = thi; 150 oldx = x; 151 y = x - d; 152 while (x < xlim && y < ylim && xv[x] == yv[y]) { 153 ++x; ++y; 154 } 155 if (x - oldx > 20) 156 big_snake = true; 157 fd[fdiagoff + d] = x; 158 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) 159 { 160 cost = 2 * c - 1; 161 return d; 162 } 163 } 164 165 /* Similar extend the bottom-up search. */ 166 if (bmin > dmin) 167 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE; 168 else 169 ++bmin; 170 if (bmax < dmax) 171 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE; 172 else 173 --bmax; 174 for (d = bmax; d >= bmin; d -= 2) 175 { 176 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1]; 177 178 if (tlo < thi) 179 x = tlo; 180 else 181 x = thi - 1; 182 oldx = x; 183 y = x - d; 184 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) { 185 --x; --y; 186 } 187 if (oldx - x > 20) 188 big_snake = true; 189 bd[bdiagoff + d] = x; 190 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) 191 { 192 cost = 2 * c; 193 return d; 194 } 195 } 196 197 /* Heuristic: check occasionally for a diagonal that has made 198 lots of progress compared with the edit distance. 199 If we have any such, find the one that has made the most 200 progress and return it as if it had succeeded. 201 202 With this heuristic, for files with a constant small density 203 of changes, the algorithm is linear in the file size. */ 204 205 if (c > 200 && big_snake && heuristic) 206 { 207 int best = 0; 208 int bestpos = -1; 209 210 for (d = fmax; d >= fmin; d -= 2) 211 { 212 int dd = d - fmid; 213 if ((fd[fdiagoff + d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd))) 214 { 215 if (fd[fdiagoff + d] * 2 - dd > best 216 && fd[fdiagoff + d] - xoff > 20 217 && fd[fdiagoff + d] - d - yoff > 20) 218 { 219 int k; 220 int x = fd[fdiagoff + d]; 221 222 /* We have a good enough best diagonal; 223 now insist that it end with a significant snake. */ 224 for (k = 1; k <= 20; k++) 225 if (xvec[x - k] != yvec[x - d - k]) 226 break; 227 228 if (k == 21) 229 { 230 best = fd[fdiagoff + d] * 2 - dd; 231 bestpos = d; 232 } 233 } 234 } 235 } 236 if (best > 0) 237 { 238 cost = 2 * c - 1; 239 return bestpos; 240 } 241 242 best = 0; 243 for (d = bmax; d >= bmin; d -= 2) 244 { 245 int dd = d - bmid; 246 if ((xlim - bd[bdiagoff + d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd))) 247 { 248 if ((xlim - bd[bdiagoff + d]) * 2 + dd > best 249 && xlim - bd[bdiagoff + d] > 20 250 && ylim - (bd[bdiagoff + d] - d) > 20) 251 { 252 /* We have a good enough best diagonal; 253 now insist that it end with a significant snake. */ 254 int k; 255 int x = bd[bdiagoff + d]; 256 257 for (k = 0; k < 20; k++) 258 if (xvec[x + k] != yvec[x - d + k]) 259 break; 260 if (k == 20) 261 { 262 best = (xlim - bd[bdiagoff + d]) * 2 + dd; 263 bestpos = d; 264 } 265 } 266 } 267 } 268 if (best > 0) 269 { 270 cost = 2 * c - 1; 271 return bestpos; 272 } 273 } 274 } 275 } 276 277 /** Compare in detail contiguous subsequences of the two files 278 which are known, as a whole, to match each other. 279 280 The results are recorded in the vectors filevec[N].changed_flag, by 281 storing a 1 in the element for each line that is an insertion or deletion. 282 283 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 284 285 Note that XLIM, YLIM are exclusive bounds. 286 All line numbers are origin-0 and discarded lines are not counted. */ 287 compareseq(int xoff, int xlim, int yoff, int ylim)288 private void compareseq (int xoff, int xlim, int yoff, int ylim) { 289 /* Slide down the bottom initial diagonal. */ 290 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) { 291 ++xoff; ++yoff; 292 } 293 /* Slide up the top initial diagonal. */ 294 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) { 295 --xlim; --ylim; 296 } 297 298 /* Handle simple cases. */ 299 if (xoff == xlim) 300 while (yoff < ylim) 301 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true; 302 else if (yoff == ylim) 303 while (xoff < xlim) 304 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true; 305 else 306 { 307 /* Find a point of correspondence in the middle of the files. */ 308 309 int d = diag (xoff, xlim, yoff, ylim); 310 int c = cost; 311 int f = fdiag[fdiagoff + d]; 312 int b = bdiag[bdiagoff + d]; 313 314 if (c == 1) 315 { 316 /* This should be impossible, because it implies that 317 one of the two subsequences is empty, 318 and that case was handled above without calling `diag'. 319 Let's verify that this is true. */ 320 throw new IllegalArgumentException("Empty subsequence"); 321 } 322 else 323 { 324 /* Use that point to split this problem into two subproblems. */ 325 compareseq (xoff, b, yoff, b - d); 326 /* This used to use f instead of b, 327 but that is incorrect! 328 It is not necessarily the case that diagonal d 329 has a snake from b to f. */ 330 compareseq (b, xlim, b - d, ylim); 331 } 332 } 333 } 334 335 /** Discard lines from one file that have no matches in the other file. 336 */ 337 discard_confusing_lines()338 private void discard_confusing_lines() { 339 filevec[0].discard_confusing_lines(filevec[1]); 340 filevec[1].discard_confusing_lines(filevec[0]); 341 } 342 343 private boolean inhibit = false; 344 345 /** Adjust inserts/deletes of blank lines to join changes 346 as much as possible. 347 */ 348 shift_boundaries()349 private void shift_boundaries() { 350 if (inhibit) 351 return; 352 filevec[0].shift_boundaries(filevec[1]); 353 filevec[1].shift_boundaries(filevec[0]); 354 } 355 356 /** Scan the tables of which lines are inserted and deleted, 357 producing an edit script in reverse order. */ 358 build_reverse_script()359 private change build_reverse_script() { 360 change script = null; 361 final boolean[] changed0 = filevec[0].changed_flag; 362 final boolean[] changed1 = filevec[1].changed_flag; 363 final int len0 = filevec[0].buffered_lines; 364 final int len1 = filevec[1].buffered_lines; 365 366 /* Note that changedN[len0] does exist, and contains 0. */ 367 368 int i0 = 0, i1 = 0; 369 370 while (i0 < len0 || i1 < len1) 371 { 372 if (changed0[1+i0] || changed1[1+i1]) 373 { 374 int line0 = i0, line1 = i1; 375 376 /* Find # lines changed here in each file. */ 377 while (changed0[1+i0]) ++i0; 378 while (changed1[1+i1]) ++i1; 379 380 /* Record this change. */ 381 script = new change(line0, line1, i0 - line0, i1 - line1, script); 382 } 383 384 /* We have reached lines in the two files that match each other. */ 385 i0++; i1++; 386 } 387 388 return script; 389 } 390 391 /** Scan the tables of which lines are inserted and deleted, 392 producing an edit script in forward order. */ 393 build_script()394 private change build_script() { 395 change script = null; 396 final boolean[] changed0 = filevec[0].changed_flag; 397 final boolean[] changed1 = filevec[1].changed_flag; 398 final int len0 = filevec[0].buffered_lines; 399 final int len1 = filevec[1].buffered_lines; 400 int i0 = len0, i1 = len1; 401 402 /* Note that changedN[-1] does exist, and contains 0. */ 403 404 while (i0 >= 0 || i1 >= 0) 405 { 406 if (changed0[i0] || changed1[i1]) 407 { 408 int line0 = i0, line1 = i1; 409 410 /* Find # lines changed here in each file. */ 411 while (changed0[i0]) --i0; 412 while (changed1[i1]) --i1; 413 414 /* Record this change. */ 415 script = new change(i0, i1, line0 - i0, line1 - i1, script); 416 } 417 418 /* We have reached lines in the two files that match each other. */ 419 i0--; i1--; 420 } 421 422 return script; 423 } 424 425 /* Report the differences of two files. DEPTH is the current directory 426 depth. */ diff_2(final boolean reverse)427 public change diff_2(final boolean reverse) { 428 429 /* Some lines are obviously insertions or deletions 430 because they don't match anything. Detect them now, 431 and avoid even thinking about them in the main comparison algorithm. */ 432 433 discard_confusing_lines (); 434 435 /* Now do the main comparison algorithm, considering just the 436 undiscarded lines. */ 437 438 xvec = filevec[0].undiscarded; 439 yvec = filevec[1].undiscarded; 440 441 int diags = 442 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3; 443 fdiag = new int[diags]; 444 fdiagoff = filevec[1].nondiscarded_lines + 1; 445 bdiag = new int[diags]; 446 bdiagoff = filevec[1].nondiscarded_lines + 1; 447 448 compareseq (0, filevec[0].nondiscarded_lines, 449 0, filevec[1].nondiscarded_lines); 450 fdiag = null; 451 bdiag = null; 452 453 /* Modify the results slightly to make them prettier 454 in cases where that can validly be done. */ 455 456 shift_boundaries (); 457 458 /* Get the results of comparison in the form of a chain 459 of `struct change's -- an edit script. */ 460 461 if (reverse) 462 return build_reverse_script(); 463 else 464 return build_script(); 465 } 466 467 /** The result of comparison is an "edit script": a chain of change objects. 468 Each change represents one place where some lines are deleted 469 and some are inserted. 470 471 LINE0 and LINE1 are the first affected lines in the two files (origin 0). 472 DELETED is the number of lines deleted here from file 0. 473 INSERTED is the number of lines inserted here in file 1. 474 475 If DELETED is 0 then LINE0 is the number of the line before 476 which the insertion was done; vice versa for INSERTED and LINE1. */ 477 478 public static class change { 479 /** Previous or next edit command. */ 480 public change link; 481 /** # lines of file 1 changed here. */ 482 public int inserted; 483 /** # lines of file 0 changed here. */ 484 public int deleted; 485 /** Line number of 1st deleted line. */ 486 public final int line0; 487 /** Line number of 1st inserted line. */ 488 public final int line1; 489 490 /** Cons an additional entry onto the front of an edit script OLD. 491 LINE0 and LINE1 are the first affected lines in the two files (origin 0). 492 DELETED is the number of lines deleted here from file 0. 493 INSERTED is the number of lines inserted here in file 1. 494 495 If DELETED is 0 then LINE0 is the number of the line before 496 which the insertion was done; vice versa for INSERTED and LINE1. */ change(int line0, int line1, int deleted, int inserted, change old)497 change(int line0, int line1, int deleted, int inserted, change old) { 498 this.line0 = line0; 499 this.line1 = line1; 500 this.inserted = inserted; 501 this.deleted = deleted; 502 this.link = old; 503 //System.err.println(line0+","+line1+","+inserted+","+deleted); 504 } 505 } 506 507 /** Data on one input file being compared. 508 */ 509 510 class file_data { 511 512 /** Allocate changed array for the results of comparison. */ clear()513 void clear() { 514 /* Allocate a flag for each line of each file, saying whether that line 515 is an insertion or deletion. 516 Allocate an extra element, always zero, at each end of each vector. 517 */ 518 changed_flag = new boolean[buffered_lines + 2]; 519 } 520 521 /** Return equiv_count[I] as the number of lines in this file 522 that fall in equivalence class I. 523 @return the array of equivalence class counts. 524 */ equivCount()525 int[] equivCount() { 526 int[] equiv_count = new int[equiv_max]; 527 for (int i = 0; i < buffered_lines; ++i) 528 ++equiv_count[equivs[i]]; 529 return equiv_count; 530 } 531 532 /** Discard lines that have no matches in another file. 533 534 A line which is discarded will not be considered by the actual 535 comparison algorithm; it will be as if that line were not in the file. 536 The file's `realindexes' table maps virtual line numbers 537 (which don't count the discarded lines) into real line numbers; 538 this is how the actual comparison algorithm produces results 539 that are comprehensible when the discarded lines are counted. 540 <p> 541 When we discard a line, we also mark it as a deletion or insertion 542 so that it will be printed in the output. 543 @param f the other file 544 */ discard_confusing_lines(file_data f)545 void discard_confusing_lines(file_data f) { 546 clear(); 547 /* Set up table of which lines are going to be discarded. */ 548 final byte[] discarded = discardable(f.equivCount()); 549 550 /* Don't really discard the provisional lines except when they occur 551 in a run of discardables, with nonprovisionals at the beginning 552 and end. */ 553 filterDiscards(discarded); 554 555 /* Actually discard the lines. */ 556 discard(discarded); 557 } 558 559 /** Mark to be discarded each line that matches no line of another file. 560 If a line matches many lines, mark it as provisionally discardable. 561 @see equivCount() 562 @param counts The count of each equivalence number for the other file. 563 @return 0=nondiscardable, 1=discardable or 2=provisionally discardable 564 for each line 565 */ 566 discardable(final int[] counts)567 private byte[] discardable(final int[] counts) { 568 final int end = buffered_lines; 569 final byte[] discards = new byte[end]; 570 final int[] equivs = this.equivs; 571 int many = 5; 572 int tem = end / 64; 573 574 /* Multiply MANY by approximate square root of number of lines. 575 That is the threshold for provisionally discardable lines. */ 576 while ((tem = tem >> 2) > 0) 577 many *= 2; 578 579 for (int i = 0; i < end; i++) 580 { 581 int nmatch; 582 if (equivs[i] == 0) 583 continue; 584 nmatch = counts[equivs[i]]; 585 if (nmatch == 0) 586 discards[i] = 1; 587 else if (nmatch > many) 588 discards[i] = 2; 589 } 590 return discards; 591 } 592 593 /** Don't really discard the provisional lines except when they occur 594 in a run of discardables, with nonprovisionals at the beginning 595 and end. */ 596 filterDiscards(final byte[] discards)597 private void filterDiscards(final byte[] discards) { 598 final int end = buffered_lines; 599 600 for (int i = 0; i < end; i++) 601 { 602 /* Cancel provisional discards not in middle of run of discards. */ 603 if (discards[i] == 2) 604 discards[i] = 0; 605 else if (discards[i] != 0) 606 { 607 /* We have found a nonprovisional discard. */ 608 int j; 609 int length; 610 int provisional = 0; 611 612 /* Find end of this run of discardable lines. 613 Count how many are provisionally discardable. */ 614 for (j = i; j < end; j++) 615 { 616 if (discards[j] == 0) 617 break; 618 if (discards[j] == 2) 619 ++provisional; 620 } 621 622 /* Cancel provisional discards at end, and shrink the run. */ 623 while (j > i && discards[j - 1] == 2) { 624 discards[--j] = 0; --provisional; 625 } 626 627 /* Now we have the length of a run of discardable lines 628 whose first and last are not provisional. */ 629 length = j - i; 630 631 /* If 1/4 of the lines in the run are provisional, 632 cancel discarding of all provisional lines in the run. */ 633 if (provisional * 4 > length) 634 { 635 while (j > i) 636 if (discards[--j] == 2) 637 discards[j] = 0; 638 } 639 else 640 { 641 int consec; 642 int minimum = 1; 643 int tem = length / 4; 644 645 /* MINIMUM is approximate square root of LENGTH/4. 646 A subrun of two or more provisionals can stand 647 when LENGTH is at least 16. 648 A subrun of 4 or more can stand when LENGTH >= 64. */ 649 while ((tem = tem >> 2) > 0) 650 minimum *= 2; 651 minimum++; 652 653 /* Cancel any subrun of MINIMUM or more provisionals 654 within the larger run. */ 655 for (j = 0, consec = 0; j < length; j++) 656 if (discards[i + j] != 2) 657 consec = 0; 658 else if (minimum == ++consec) 659 /* Back up to start of subrun, to cancel it all. */ 660 j -= consec; 661 else if (minimum < consec) 662 discards[i + j] = 0; 663 664 /* Scan from beginning of run 665 until we find 3 or more nonprovisionals in a row 666 or until the first nonprovisional at least 8 lines in. 667 Until that point, cancel any provisionals. */ 668 for (j = 0, consec = 0; j < length; j++) 669 { 670 if (j >= 8 && discards[i + j] == 1) 671 break; 672 if (discards[i + j] == 2) { 673 consec = 0; discards[i + j] = 0; 674 } 675 else if (discards[i + j] == 0) 676 consec = 0; 677 else 678 consec++; 679 if (consec == 3) 680 break; 681 } 682 683 /* I advances to the last line of the run. */ 684 i += length - 1; 685 686 /* Same thing, from end. */ 687 for (j = 0, consec = 0; j < length; j++) 688 { 689 if (j >= 8 && discards[i - j] == 1) 690 break; 691 if (discards[i - j] == 2) { 692 consec = 0; discards[i - j] = 0; 693 } 694 else if (discards[i - j] == 0) 695 consec = 0; 696 else 697 consec++; 698 if (consec == 3) 699 break; 700 } 701 } 702 } 703 } 704 } 705 706 /** Actually discard the lines. 707 @param discards flags lines to be discarded 708 */ discard(final byte[] discards)709 private void discard(final byte[] discards) { 710 final int end = buffered_lines; 711 int j = 0; 712 for (int i = 0; i < end; ++i) 713 if (no_discards || discards[i] == 0) 714 { 715 undiscarded[j] = equivs[i]; 716 realindexes[j++] = i; 717 } 718 else 719 changed_flag[1+i] = true; 720 nondiscarded_lines = j; 721 } 722 file_data(Object[] data,Hashtable h)723 file_data(Object[] data,Hashtable h) { 724 buffered_lines = data.length; 725 726 equivs = new int[buffered_lines]; 727 undiscarded = new int[buffered_lines]; 728 realindexes = new int[buffered_lines]; 729 730 for (int i = 0; i < data.length; ++i) { 731 Integer ir = (Integer)h.get(data[i]); 732 if (ir == null) 733 h.put(data[i],new Integer(equivs[i] = equiv_max++)); 734 else 735 equivs[i] = ir.intValue(); 736 } 737 } 738 739 /** Adjust inserts/deletes of blank lines to join changes 740 as much as possible. 741 742 We do something when a run of changed lines include a blank 743 line at one end and have an excluded blank line at the other. 744 We are free to choose which blank line is included. 745 `compareseq' always chooses the one at the beginning, 746 but usually it is cleaner to consider the following blank line 747 to be the "change". The only exception is if the preceding blank line 748 would join this change to other changes. 749 @param f the file being compared against 750 */ 751 shift_boundaries(file_data f)752 void shift_boundaries(file_data f) { 753 final boolean[] changed = changed_flag; 754 final boolean[] other_changed = f.changed_flag; 755 int i = 0; 756 int j = 0; 757 int i_end = buffered_lines; 758 int preceding = -1; 759 int other_preceding = -1; 760 761 for (;;) 762 { 763 int start, end, other_start; 764 765 /* Scan forwards to find beginning of another run of changes. 766 Also keep track of the corresponding point in the other file. */ 767 768 while (i < i_end && !changed[1+i]) 769 { 770 while (other_changed[1+j++]) 771 /* Non-corresponding lines in the other file 772 will count as the preceding batch of changes. */ 773 other_preceding = j; 774 i++; 775 } 776 777 if (i == i_end) 778 break; 779 780 start = i; 781 other_start = j; 782 783 for (;;) 784 { 785 /* Now find the end of this run of changes. */ 786 787 while (i < i_end && changed[1+i]) i++; 788 end = i; 789 790 /* If the first changed line matches the following unchanged one, 791 and this run does not follow right after a previous run, 792 and there are no lines deleted from the other file here, 793 then classify the first changed line as unchanged 794 and the following line as changed in its place. */ 795 796 /* You might ask, how could this run follow right after another? 797 Only because the previous run was shifted here. */ 798 799 if (end != i_end 800 && equivs[start] == equivs[end] 801 && !other_changed[1+j] 802 && end != i_end 803 && !((preceding >= 0 && start == preceding) 804 || (other_preceding >= 0 805 && other_start == other_preceding))) 806 { 807 changed[1+end++] = true; 808 changed[1+start++] = false; 809 ++i; 810 /* Since one line-that-matches is now before this run 811 instead of after, we must advance in the other file 812 to keep in synch. */ 813 ++j; 814 } 815 else 816 break; 817 } 818 819 preceding = i; 820 other_preceding = j; 821 } 822 } 823 824 /** Number of elements (lines) in this file. */ 825 final int buffered_lines; 826 827 /** Vector, indexed by line number, containing an equivalence code for 828 each line. It is this vector that is actually compared with that 829 of another file to generate differences. */ 830 private final int[] equivs; 831 832 /** Vector, like the previous one except that 833 the elements for discarded lines have been squeezed out. */ 834 final int[] undiscarded; 835 836 /** Vector mapping virtual line numbers (not counting discarded lines) 837 to real ones (counting those lines). Both are origin-0. */ 838 final int[] realindexes; 839 840 /** Total number of nondiscarded lines. */ 841 int nondiscarded_lines; 842 843 /** Array, indexed by real origin-1 line number, 844 containing true for a line that is an insertion or a deletion. 845 The results of comparison are stored here. */ 846 boolean[] changed_flag; 847 848 } 849 850 } 851