1module HTMLDiff 2 3 Match = Struct.new(:start_in_old, :start_in_new, :size) 4 class Match 5 def end_in_old 6 self.start_in_old + self.size 7 end 8 9 def end_in_new 10 self.start_in_new + self.size 11 end 12 end 13 14 Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new) 15 16 class DiffBuilder 17 18 def initialize(old_version, new_version) 19 @old_version, @new_version = old_version, new_version 20 split_inputs_to_words 21 index_new_words 22 end 23 24 def split_inputs_to_words 25 @old_words = explode(@old_version) 26 @new_words = explode(@new_version) 27 end 28 29 def index_new_words 30 @word_indices = Hash.new { |h, word| h[word] = [] } 31 @new_words.each_with_index { |word, i| @word_indices[word] << i } 32 end 33 34 def operations 35 position_in_old = position_in_new = 0 36 operations = [] 37 38 matches = matching_blocks 39 # an empty match at the end forces the loop below to handle the unmatched tails 40 # I'm sure it can be done more gracefully, but not at 23:52 41 matches << Match.new(@old_words.length, @new_words.length, 0) 42 43 matches.each_with_index do |match, i| 44 match_starts_at_current_position_in_old = (position_in_old == match.start_in_old) 45 match_starts_at_current_position_in_new = (position_in_new == match.start_in_new) 46 47 action_upto_match_positions = 48 case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new] 49 when [false, false] 50 :replace 51 when [true, false] 52 :insert 53 when [false, true] 54 :delete 55 else 56 # this happens if the first few words are same in both versions 57 :none 58 end 59 60 if action_upto_match_positions != :none 61 operation_upto_match_positions = 62 Operation.new(action_upto_match_positions, 63 position_in_old, match.start_in_old, 64 position_in_new, match.start_in_new) 65 operations << operation_upto_match_positions 66 end 67 if match.size != 0 68 match_operation = Operation.new(:equal, 69 match.start_in_old, match.end_in_old, 70 match.start_in_new, match.end_in_new) 71 operations << match_operation 72 end 73 74 position_in_old = match.end_in_old 75 position_in_new = match.end_in_new 76 end 77 78 operations 79 end 80 81 def matching_blocks 82 matching_blocks = [] 83 recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks) 84 matching_blocks 85 end 86 87 def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) 88 match = find_match(start_in_old, end_in_old, start_in_new, end_in_new) 89 if match 90 if start_in_old < match.start_in_old and start_in_new < match.start_in_new 91 recursively_find_matching_blocks( 92 start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks) 93 end 94 matching_blocks << match 95 if match.end_in_old < end_in_old and match.end_in_new < end_in_new 96 recursively_find_matching_blocks( 97 match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks) 98 end 99 end 100 end 101 102 def find_match(start_in_old, end_in_old, start_in_new, end_in_new) 103 104 best_match_in_old = start_in_old 105 best_match_in_new = start_in_new 106 best_match_size = 0 107 108 match_length_at = Hash.new { |h, index| h[index] = 0 } 109 110 start_in_old.upto(end_in_old - 1) do |index_in_old| 111 112 new_match_length_at = Hash.new { |h, index| h[index] = 0 } 113 114 @word_indices[@old_words[index_in_old]].each do |index_in_new| 115 next if index_in_new < start_in_new 116 break if index_in_new >= end_in_new 117 118 new_match_length = match_length_at[index_in_new - 1] + 1 119 new_match_length_at[index_in_new] = new_match_length 120 121 if new_match_length > best_match_size 122 best_match_in_old = index_in_old - new_match_length + 1 123 best_match_in_new = index_in_new - new_match_length + 1 124 best_match_size = new_match_length 125 end 126 end 127 match_length_at = new_match_length_at 128 end 129 130# best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left( 131# best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new) 132# best_match_in_old, best_match_in_new, match_size = add_matching_words_right( 133# best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new) 134 135 return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil) 136 end 137 138 def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) 139 while match_in_old > start_in_old and 140 match_in_new > start_in_new and 141 @old_words[match_in_old - 1] == @new_words[match_in_new - 1] 142 match_in_old -= 1 143 match_in_new -= 1 144 match_size += 1 145 end 146 [match_in_old, match_in_new, match_size] 147 end 148 149 def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) 150 while match_in_old + match_size < end_in_old and 151 match_in_new + match_size < end_in_new and 152 @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size] 153 match_size += 1 154 end 155 [match_in_old, match_in_new, match_size] 156 end 157 158 def explode(sequence) 159 sequence.is_a?(String) ? sequence.split(//) : sequence 160 end 161 162 end # of class Diff Builder 163 164end 165