1#!/usr/bin/env python 2########################################################################## 3# 4# Copyright 2011 Jose Fonseca 5# All Rights Reserved. 6# 7# Permission is hereby granted, free of charge, to any person obtaining a copy 8# of this software and associated documentation files (the "Software"), to deal 9# in the Software without restriction, including without limitation the rights 10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11# copies of the Software, and to permit persons to whom the Software is 12# furnished to do so, subject to the following conditions: 13# 14# The above copyright notice and this permission notice shall be included in 15# all copies or substantial portions of the Software. 16# 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23# THE SOFTWARE. 24# 25##########################################################################/ 26 27 28import json 29import optparse 30import re 31import difflib 32import sys 33 34 35def strip_object_hook(obj): 36 if '__class__' in obj: 37 return None 38 for name in obj.keys(): 39 if name.startswith('__') and name.endswith('__'): 40 del obj[name] 41 return obj 42 43 44class Visitor: 45 46 def visit(self, node, *args, **kwargs): 47 if isinstance(node, dict): 48 return self.visitObject(node, *args, **kwargs) 49 elif isinstance(node, list): 50 return self.visitArray(node, *args, **kwargs) 51 else: 52 return self.visitValue(node, *args, **kwargs) 53 54 def visitObject(self, node, *args, **kwargs): 55 pass 56 57 def visitArray(self, node, *args, **kwargs): 58 pass 59 60 def visitValue(self, node, *args, **kwargs): 61 pass 62 63 64class Dumper(Visitor): 65 66 def __init__(self, stream = sys.stdout): 67 self.stream = stream 68 self.level = 0 69 70 def _write(self, s): 71 self.stream.write(s) 72 73 def _indent(self): 74 self._write(' '*self.level) 75 76 def _newline(self): 77 self._write('\n') 78 79 def visitObject(self, node): 80 self.enter_object() 81 82 members = node.keys() 83 members.sort() 84 for i in range(len(members)): 85 name = members[i] 86 value = node[name] 87 self.enter_member(name) 88 self.visit(value) 89 self.leave_member(i == len(members) - 1) 90 self.leave_object() 91 92 def enter_object(self): 93 self._write('{') 94 self._newline() 95 self.level += 1 96 97 def enter_member(self, name): 98 self._indent() 99 self._write('%s: ' % name) 100 101 def leave_member(self, last): 102 if not last: 103 self._write(',') 104 self._newline() 105 106 def leave_object(self): 107 self.level -= 1 108 self._indent() 109 self._write('}') 110 if self.level <= 0: 111 self._newline() 112 113 def visitArray(self, node): 114 self.enter_array() 115 for i in range(len(node)): 116 value = node[i] 117 self._indent() 118 self.visit(value) 119 if i != len(node) - 1: 120 self._write(',') 121 self._newline() 122 self.leave_array() 123 124 def enter_array(self): 125 self._write('[') 126 self._newline() 127 self.level += 1 128 129 def leave_array(self): 130 self.level -= 1 131 self._indent() 132 self._write(']') 133 134 def visitValue(self, node): 135 self._write(json.dumps(node, allow_nan=True)) 136 137 138 139class Comparer(Visitor): 140 141 def __init__(self, ignore_added = False, tolerance = 2.0 ** -24): 142 self.ignore_added = ignore_added 143 self.tolerance = tolerance 144 145 def visitObject(self, a, b): 146 if not isinstance(b, dict): 147 return False 148 if len(a) != len(b) and not self.ignore_added: 149 return False 150 ak = a.keys() 151 bk = b.keys() 152 ak.sort() 153 bk.sort() 154 if ak != bk and not self.ignore_added: 155 return False 156 for k in ak: 157 ae = a[k] 158 try: 159 be = b[k] 160 except KeyError: 161 return False 162 if not self.visit(ae, be): 163 return False 164 return True 165 166 def visitArray(self, a, b): 167 if not isinstance(b, list): 168 return False 169 if len(a) != len(b): 170 return False 171 for ae, be in zip(a, b): 172 if not self.visit(ae, be): 173 return False 174 return True 175 176 def visitValue(self, a, b): 177 if isinstance(a, float) or isinstance(b, float): 178 if a == 0: 179 return abs(b) < self.tolerance 180 else: 181 return abs((b - a)/a) < self.tolerance 182 else: 183 return a == b 184 185 186class Differ(Visitor): 187 188 def __init__(self, stream = sys.stdout, ignore_added = False): 189 self.dumper = Dumper(stream) 190 self.comparer = Comparer(ignore_added = ignore_added) 191 192 def visit(self, a, b): 193 if self.comparer.visit(a, b): 194 return 195 Visitor.visit(self, a, b) 196 197 def visitObject(self, a, b): 198 if not isinstance(b, dict): 199 self.replace(a, b) 200 else: 201 self.dumper.enter_object() 202 names = set(a.keys()) 203 if not self.comparer.ignore_added: 204 names.update(b.keys()) 205 names = list(names) 206 names.sort() 207 208 for i in range(len(names)): 209 name = names[i] 210 ae = a.get(name, None) 211 be = b.get(name, None) 212 if not self.comparer.visit(ae, be): 213 self.dumper.enter_member(name) 214 self.visit(ae, be) 215 self.dumper.leave_member(i == len(names) - 1) 216 217 self.dumper.leave_object() 218 219 def visitArray(self, a, b): 220 if not isinstance(b, list): 221 self.replace(a, b) 222 else: 223 self.dumper.enter_array() 224 max_len = max(len(a), len(b)) 225 for i in range(max_len): 226 try: 227 ae = a[i] 228 except IndexError: 229 ae = None 230 try: 231 be = b[i] 232 except IndexError: 233 be = None 234 self.dumper._indent() 235 if self.comparer.visit(ae, be): 236 self.dumper.visit(ae) 237 else: 238 self.visit(ae, be) 239 if i != max_len - 1: 240 self.dumper._write(',') 241 self.dumper._newline() 242 243 self.dumper.leave_array() 244 245 def visitValue(self, a, b): 246 if a != b: 247 self.replace(a, b) 248 249 def replace(self, a, b): 250 if isinstance(a, basestring) and isinstance(b, basestring): 251 if '\n' in a or '\n' in b: 252 a = a.splitlines() 253 b = b.splitlines() 254 differ = difflib.Differ() 255 result = differ.compare(a, b) 256 self.dumper.level += 1 257 for entry in result: 258 self.dumper._newline() 259 self.dumper._indent() 260 tag = entry[:2] 261 text = entry[2:] 262 if tag == '? ': 263 tag = ' ' 264 prefix = ' ' 265 text = text.rstrip() 266 suffix = '' 267 else: 268 prefix = '"' 269 suffix = '\\n"' 270 line = tag + prefix + text + suffix 271 self.dumper._write(line) 272 self.dumper.level -= 1 273 return 274 self.dumper.visit(a) 275 self.dumper._write(' -> ') 276 self.dumper.visit(b) 277 278 def isMultilineString(self, value): 279 return isinstance(value, basestring) and '\n' in value 280 281 def replaceMultilineString(self, a, b): 282 self.dumper.visit(a) 283 self.dumper._write(' -> ') 284 self.dumper.visit(b) 285 286 287# 288# Unfortunately JSON standard does not include comments, but this is a quite 289# useful feature to have on regressions tests 290# 291 292_token_res = [ 293 r'//[^\r\n]*', # comment 294 r'"[^"\\]*(\\.[^"\\]*)*"', # string 295] 296 297_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL) 298 299 300def _strip_comment(mo): 301 if mo.group(1): 302 return '' 303 else: 304 return mo.group(0) 305 306 307def _strip_comments(data): 308 '''Strip (non-standard) JSON comments.''' 309 return _tokens_re.sub(_strip_comment, data) 310 311 312assert _strip_comments('''// a comment 313"// a comment in a string 314"''') == ''' 315"// a comment in a string 316"''' 317 318 319def load(stream, strip_images = True, strip_comments = True): 320 if strip_images: 321 object_hook = strip_object_hook 322 else: 323 object_hook = None 324 if strip_comments: 325 data = stream.read() 326 data = _strip_comments(data) 327 return json.loads(data, strict=False, object_hook = object_hook) 328 else: 329 return json.load(stream, strict=False, object_hook = object_hook) 330 331 332def main(): 333 optparser = optparse.OptionParser( 334 usage="\n\t%prog [options] <ref_json> <src_json>") 335 optparser.add_option( 336 '--keep-images', 337 action="store_false", dest="strip_images", default=True, 338 help="compare images") 339 340 (options, args) = optparser.parse_args(sys.argv[1:]) 341 342 if len(args) != 2: 343 optparser.error('incorrect number of arguments') 344 345 a = load(open(sys.argv[1], 'rt'), options.strip_images) 346 b = load(open(sys.argv[2], 'rt'), options.strip_images) 347 348 if False: 349 dumper = Dumper() 350 dumper.visit(a) 351 352 differ = Differ() 353 differ.visit(a, b) 354 355 356if __name__ == '__main__': 357 main() 358