• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2##########################################################################
3#
4# Copyright 2011 Jose Fonseca
5# All Rights Reserved.
6#
7# Permission is hereby granted, free of charge, to any person obtaining a copy
8# of this software and associated documentation files (the "Software"), to deal
9# in the Software without restriction, including without limitation the rights
10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11# copies of the Software, and to permit persons to whom the Software is
12# furnished to do so, subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be included in
15# all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23# THE SOFTWARE.
24#
25##########################################################################/
26
27
28import json
29import argparse
30import re
31import difflib
32import sys
33
34
35def strip_object_hook(obj):
36    if '__class__' in obj:
37        return None
38    for name in obj.keys():
39        if name.startswith('__') and name.endswith('__'):
40            del obj[name]
41    return obj
42
43
44class Visitor:
45
46    def visit(self, node, *args, **kwargs):
47        if isinstance(node, dict):
48            return self.visitObject(node, *args, **kwargs)
49        elif isinstance(node, list):
50            return self.visitArray(node, *args, **kwargs)
51        else:
52            return self.visitValue(node, *args, **kwargs)
53
54    def visitObject(self, node, *args, **kwargs):
55        pass
56
57    def visitArray(self, node, *args, **kwargs):
58        pass
59
60    def visitValue(self, node, *args, **kwargs):
61        pass
62
63
64class Dumper(Visitor):
65
66    def __init__(self, stream = sys.stdout):
67        self.stream = stream
68        self.level = 0
69
70    def _write(self, s):
71        self.stream.write(s)
72
73    def _indent(self):
74        self._write('  '*self.level)
75
76    def _newline(self):
77        self._write('\n')
78
79    def visitObject(self, node):
80        self.enter_object()
81
82        members = sorted(node)
83        for i in range(len(members)):
84            name = members[i]
85            value = node[name]
86            self.enter_member(name)
87            self.visit(value)
88            self.leave_member(i == len(members) - 1)
89        self.leave_object()
90
91    def enter_object(self):
92        self._write('{')
93        self._newline()
94        self.level += 1
95
96    def enter_member(self, name):
97        self._indent()
98        self._write('%s: ' % name)
99
100    def leave_member(self, last):
101        if not last:
102            self._write(',')
103        self._newline()
104
105    def leave_object(self):
106        self.level -= 1
107        self._indent()
108        self._write('}')
109        if self.level <= 0:
110            self._newline()
111
112    def visitArray(self, node):
113        self.enter_array()
114        for i in range(len(node)):
115            value = node[i]
116            self._indent()
117            self.visit(value)
118            if i != len(node) - 1:
119                self._write(',')
120            self._newline()
121        self.leave_array()
122
123    def enter_array(self):
124        self._write('[')
125        self._newline()
126        self.level += 1
127
128    def leave_array(self):
129        self.level -= 1
130        self._indent()
131        self._write(']')
132
133    def visitValue(self, node):
134        self._write(json.dumps(node, allow_nan=True))
135
136
137
138class Comparer(Visitor):
139
140    def __init__(self, ignore_added = False, tolerance = 2.0 ** -24):
141        self.ignore_added = ignore_added
142        self.tolerance = tolerance
143
144    def visitObject(self, a, b):
145        if not isinstance(b, dict):
146            return False
147        if len(a) != len(b) and not self.ignore_added:
148            return False
149        ak = sorted(a)
150        bk = sorted(b)
151        if ak != bk and not self.ignore_added:
152            return False
153        for k in ak:
154            ae = a[k]
155            try:
156                be = b[k]
157            except KeyError:
158                return False
159            if not self.visit(ae, be):
160                return False
161        return True
162
163    def visitArray(self, a, b):
164        if not isinstance(b, list):
165            return False
166        if len(a) != len(b):
167            return False
168        for ae, be in zip(a, b):
169            if not self.visit(ae, be):
170                return False
171        return True
172
173    def visitValue(self, a, b):
174        if isinstance(a, float) and isinstance(b, float):
175            if a == 0:
176                return abs(b) < self.tolerance
177            else:
178                return abs((b - a) / a) < self.tolerance
179        else:
180            return a == b
181
182
183class Differ(Visitor):
184
185    def __init__(self, stream = sys.stdout, ignore_added = False):
186        self.dumper = Dumper(stream)
187        self.comparer = Comparer(ignore_added = ignore_added)
188
189    def visit(self, a, b):
190        if self.comparer.visit(a, b):
191            return
192        Visitor.visit(self, a, b)
193
194    def visitObject(self, a, b):
195        if not isinstance(b, dict):
196            self.replace(a, b)
197        else:
198            self.dumper.enter_object()
199            names = set(a.keys())
200            if not self.comparer.ignore_added:
201                names.update(b.keys())
202            names = list(names)
203            names.sort()
204
205            for i in range(len(names)):
206                name = names[i]
207                ae = a.get(name, None)
208                be = b.get(name, None)
209                if not self.comparer.visit(ae, be):
210                    self.dumper.enter_member(name)
211                    self.visit(ae, be)
212                    self.dumper.leave_member(i == len(names) - 1)
213
214            self.dumper.leave_object()
215
216    def visitArray(self, a, b):
217        if not isinstance(b, list):
218            self.replace(a, b)
219        else:
220            self.dumper.enter_array()
221            max_len = max(len(a), len(b))
222            for i in range(max_len):
223                try:
224                    ae = a[i]
225                except IndexError:
226                    ae = None
227                try:
228                    be = b[i]
229                except IndexError:
230                    be = None
231                self.dumper._indent()
232                if self.comparer.visit(ae, be):
233                    self.dumper.visit(ae)
234                else:
235                    self.visit(ae, be)
236                if i != max_len - 1:
237                    self.dumper._write(',')
238                self.dumper._newline()
239
240            self.dumper.leave_array()
241
242    def visitValue(self, a, b):
243        if a != b:
244            self.replace(a, b)
245
246    def replace(self, a, b):
247        if isinstance(a, str) and isinstance(b, str):
248            if '\n' in a or '\n' in b:
249                a = a.splitlines()
250                b = b.splitlines()
251                differ = difflib.Differ()
252                result = differ.compare(a, b)
253                self.dumper.level += 1
254                for entry in result:
255                    self.dumper._newline()
256                    self.dumper._indent()
257                    tag = entry[:2]
258                    text = entry[2:]
259                    if tag == '? ':
260                        tag = '  '
261                        prefix = ' '
262                        text = text.rstrip()
263                        suffix = ''
264                    else:
265                        prefix = '"'
266                        suffix = '\\n"'
267                    line = tag + prefix + text + suffix
268                    self.dumper._write(line)
269                self.dumper.level -= 1
270                return
271        self.dumper.visit(a)
272        self.dumper._write(' -> ')
273        self.dumper.visit(b)
274
275    def isMultilineString(self, value):
276        return isinstance(value, str) and '\n' in value
277
278    def replaceMultilineString(self, a, b):
279        self.dumper.visit(a)
280        self.dumper._write(' -> ')
281        self.dumper.visit(b)
282
283
284#
285# Unfortunately JSON standard does not include comments, but this is a quite
286# useful feature to have on regressions tests
287#
288
289_token_res = [
290    r'//[^\r\n]*', # comment
291    r'"[^"\\]*(\\.[^"\\]*)*"', # string
292]
293
294_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)
295
296
297def _strip_comment(mo):
298    if mo.group(1):
299        return ''
300    else:
301        return mo.group(0)
302
303
304def _strip_comments(data):
305    '''Strip (non-standard) JSON comments.'''
306    return _tokens_re.sub(_strip_comment, data)
307
308
309assert _strip_comments('''// a comment
310"// a comment in a string
311"''') == '''
312"// a comment in a string
313"'''
314
315
316def load(stream, strip_images = True, strip_comments = True):
317    if strip_images:
318        object_hook = strip_object_hook
319    else:
320        object_hook = None
321    if strip_comments:
322        data = stream.read()
323        data = _strip_comments(data)
324        return json.loads(data, strict=False, object_hook = object_hook)
325    else:
326        return json.load(stream, strict=False, object_hook = object_hook)
327
328
329def main():
330    optparser = argparse.ArgumentParser(
331        description="Diff JSON format state dump files")
332    optparser.add_argument("-k", "--keep-images",
333        action="store_false", dest="strip_images", default=True,
334        help="compare images")
335
336    optparser.add_argument("ref_json", action="store",
337        type=str, help="reference state file")
338    optparser.add_argument("src_json", action="store",
339        type=str, help="source state file")
340
341    args = optparser.parse_args()
342
343    a = load(open(args.ref_json, 'rt'), args.strip_images)
344    b = load(open(args.src_json, 'rt'), args.strip_images)
345
346    if False:
347        dumper = Dumper()
348        dumper.visit(a)
349
350    differ = Differ()
351    differ.visit(a, b)
352
353
354if __name__ == '__main__':
355    main()
356