• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1module HTMLDiff
2
3  Match = Struct.new(:start_in_old, :start_in_new, :size)
4  class Match
5    def end_in_old
6      self.start_in_old + self.size
7    end
8
9    def end_in_new
10      self.start_in_new + self.size
11    end
12  end
13
14  Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
15
16  class DiffBuilder
17
18    def initialize(old_version, new_version)
19      @old_version, @new_version = old_version, new_version
20      split_inputs_to_words
21      index_new_words
22    end
23
24    def split_inputs_to_words
25      @old_words = explode(@old_version)
26      @new_words = explode(@new_version)
27    end
28
29    def index_new_words
30      @word_indices = Hash.new { |h, word| h[word] = [] }
31      @new_words.each_with_index { |word, i| @word_indices[word] << i }
32    end
33
34    def operations
35      position_in_old = position_in_new = 0
36      operations = []
37
38      matches = matching_blocks
39      # an empty match at the end forces the loop below to handle the unmatched tails
40      # I'm sure it can be done more gracefully, but not at 23:52
41      matches << Match.new(@old_words.length, @new_words.length, 0)
42
43      matches.each_with_index do |match, i|
44        match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
45        match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
46
47        action_upto_match_positions =
48          case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
49          when [false, false]
50            :replace
51          when [true, false]
52            :insert
53          when [false, true]
54            :delete
55          else
56            # this happens if the first few words are same in both versions
57            :none
58          end
59
60        if action_upto_match_positions != :none
61          operation_upto_match_positions =
62              Operation.new(action_upto_match_positions,
63                  position_in_old, match.start_in_old,
64                  position_in_new, match.start_in_new)
65          operations << operation_upto_match_positions
66        end
67        if match.size != 0
68          match_operation = Operation.new(:equal,
69              match.start_in_old, match.end_in_old,
70              match.start_in_new, match.end_in_new)
71          operations << match_operation
72        end
73
74        position_in_old = match.end_in_old
75        position_in_new = match.end_in_new
76      end
77
78      operations
79    end
80
81    def matching_blocks
82      matching_blocks = []
83      recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
84      matching_blocks
85    end
86
87    def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
88      match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
89      if match
90        if start_in_old < match.start_in_old and start_in_new < match.start_in_new
91          recursively_find_matching_blocks(
92              start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
93        end
94        matching_blocks << match
95        if match.end_in_old < end_in_old and match.end_in_new < end_in_new
96          recursively_find_matching_blocks(
97              match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
98        end
99      end
100    end
101
102    def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
103
104      best_match_in_old = start_in_old
105      best_match_in_new = start_in_new
106      best_match_size = 0
107
108      match_length_at = Hash.new { |h, index| h[index] = 0 }
109
110      start_in_old.upto(end_in_old - 1) do |index_in_old|
111
112        new_match_length_at = Hash.new { |h, index| h[index] = 0 }
113
114        @word_indices[@old_words[index_in_old]].each do |index_in_new|
115          next  if index_in_new < start_in_new
116          break if index_in_new >= end_in_new
117
118          new_match_length = match_length_at[index_in_new - 1] + 1
119          new_match_length_at[index_in_new] = new_match_length
120
121          if new_match_length > best_match_size
122            best_match_in_old = index_in_old - new_match_length + 1
123            best_match_in_new = index_in_new - new_match_length + 1
124            best_match_size = new_match_length
125          end
126        end
127        match_length_at = new_match_length_at
128      end
129
130#      best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
131#          best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
132#      best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
133#          best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
134
135      return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
136    end
137
138    def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
139      while match_in_old > start_in_old and
140            match_in_new > start_in_new and
141            @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
142        match_in_old -= 1
143        match_in_new -= 1
144        match_size += 1
145      end
146      [match_in_old, match_in_new, match_size]
147    end
148
149    def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
150      while match_in_old + match_size < end_in_old and
151            match_in_new + match_size < end_in_new and
152            @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
153        match_size += 1
154      end
155      [match_in_old, match_in_new, match_size]
156    end
157
158    def explode(sequence)
159      sequence.is_a?(String) ? sequence.split(//) : sequence
160    end
161
162  end # of class Diff Builder
163
164end
165