1#!/usr/bin/env python3 2########################################################################## 3# 4# Copyright 2011 Jose Fonseca 5# All Rights Reserved. 6# 7# Permission is hereby granted, free of charge, to any person obtaining a copy 8# of this software and associated documentation files (the "Software"), to deal 9# in the Software without restriction, including without limitation the rights 10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11# copies of the Software, and to permit persons to whom the Software is 12# furnished to do so, subject to the following conditions: 13# 14# The above copyright notice and this permission notice shall be included in 15# all copies or substantial portions of the Software. 16# 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23# THE SOFTWARE. 24# 25##########################################################################/ 26 27 28import json 29import argparse 30import re 31import difflib 32import sys 33 34 35def strip_object_hook(obj): 36 if '__class__' in obj: 37 return None 38 for name in obj.keys(): 39 if name.startswith('__') and name.endswith('__'): 40 del obj[name] 41 return obj 42 43 44class Visitor: 45 46 def visit(self, node, *args, **kwargs): 47 if isinstance(node, dict): 48 return self.visitObject(node, *args, **kwargs) 49 elif isinstance(node, list): 50 return self.visitArray(node, *args, **kwargs) 51 else: 52 return self.visitValue(node, *args, **kwargs) 53 54 def visitObject(self, node, *args, **kwargs): 55 pass 56 57 def visitArray(self, node, *args, **kwargs): 58 pass 59 60 def visitValue(self, node, *args, **kwargs): 61 pass 62 63 64class Dumper(Visitor): 65 66 def __init__(self, stream = sys.stdout): 67 self.stream = stream 68 self.level = 0 69 70 def _write(self, s): 71 self.stream.write(s) 72 73 def _indent(self): 74 self._write(' '*self.level) 75 76 def _newline(self): 77 self._write('\n') 78 79 def visitObject(self, node): 80 self.enter_object() 81 82 members = sorted(node) 83 for i in range(len(members)): 84 name = members[i] 85 value = node[name] 86 self.enter_member(name) 87 self.visit(value) 88 self.leave_member(i == len(members) - 1) 89 self.leave_object() 90 91 def enter_object(self): 92 self._write('{') 93 self._newline() 94 self.level += 1 95 96 def enter_member(self, name): 97 self._indent() 98 self._write('%s: ' % name) 99 100 def leave_member(self, last): 101 if not last: 102 self._write(',') 103 self._newline() 104 105 def leave_object(self): 106 self.level -= 1 107 self._indent() 108 self._write('}') 109 if self.level <= 0: 110 self._newline() 111 112 def visitArray(self, node): 113 self.enter_array() 114 for i in range(len(node)): 115 value = node[i] 116 self._indent() 117 self.visit(value) 118 if i != len(node) - 1: 119 self._write(',') 120 self._newline() 121 self.leave_array() 122 123 def enter_array(self): 124 self._write('[') 125 self._newline() 126 self.level += 1 127 128 def leave_array(self): 129 self.level -= 1 130 self._indent() 131 self._write(']') 132 133 def visitValue(self, node): 134 self._write(json.dumps(node, allow_nan=True)) 135 136 137 138class Comparer(Visitor): 139 140 def __init__(self, ignore_added = False, tolerance = 2.0 ** -24): 141 self.ignore_added = ignore_added 142 self.tolerance = tolerance 143 144 def visitObject(self, a, b): 145 if not isinstance(b, dict): 146 return False 147 if len(a) != len(b) and not self.ignore_added: 148 return False 149 ak = sorted(a) 150 bk = sorted(b) 151 if ak != bk and not self.ignore_added: 152 return False 153 for k in ak: 154 ae = a[k] 155 try: 156 be = b[k] 157 except KeyError: 158 return False 159 if not self.visit(ae, be): 160 return False 161 return True 162 163 def visitArray(self, a, b): 164 if not isinstance(b, list): 165 return False 166 if len(a) != len(b): 167 return False 168 for ae, be in zip(a, b): 169 if not self.visit(ae, be): 170 return False 171 return True 172 173 def visitValue(self, a, b): 174 if isinstance(a, float) and isinstance(b, float): 175 if a == 0: 176 return abs(b) < self.tolerance 177 else: 178 return abs((b - a) / a) < self.tolerance 179 else: 180 return a == b 181 182 183class Differ(Visitor): 184 185 def __init__(self, stream = sys.stdout, ignore_added = False): 186 self.dumper = Dumper(stream) 187 self.comparer = Comparer(ignore_added = ignore_added) 188 189 def visit(self, a, b): 190 if self.comparer.visit(a, b): 191 return 192 Visitor.visit(self, a, b) 193 194 def visitObject(self, a, b): 195 if not isinstance(b, dict): 196 self.replace(a, b) 197 else: 198 self.dumper.enter_object() 199 names = set(a.keys()) 200 if not self.comparer.ignore_added: 201 names.update(b.keys()) 202 names = list(names) 203 names.sort() 204 205 for i in range(len(names)): 206 name = names[i] 207 ae = a.get(name, None) 208 be = b.get(name, None) 209 if not self.comparer.visit(ae, be): 210 self.dumper.enter_member(name) 211 self.visit(ae, be) 212 self.dumper.leave_member(i == len(names) - 1) 213 214 self.dumper.leave_object() 215 216 def visitArray(self, a, b): 217 if not isinstance(b, list): 218 self.replace(a, b) 219 else: 220 self.dumper.enter_array() 221 max_len = max(len(a), len(b)) 222 for i in range(max_len): 223 try: 224 ae = a[i] 225 except IndexError: 226 ae = None 227 try: 228 be = b[i] 229 except IndexError: 230 be = None 231 self.dumper._indent() 232 if self.comparer.visit(ae, be): 233 self.dumper.visit(ae) 234 else: 235 self.visit(ae, be) 236 if i != max_len - 1: 237 self.dumper._write(',') 238 self.dumper._newline() 239 240 self.dumper.leave_array() 241 242 def visitValue(self, a, b): 243 if a != b: 244 self.replace(a, b) 245 246 def replace(self, a, b): 247 if isinstance(a, str) and isinstance(b, str): 248 if '\n' in a or '\n' in b: 249 a = a.splitlines() 250 b = b.splitlines() 251 differ = difflib.Differ() 252 result = differ.compare(a, b) 253 self.dumper.level += 1 254 for entry in result: 255 self.dumper._newline() 256 self.dumper._indent() 257 tag = entry[:2] 258 text = entry[2:] 259 if tag == '? ': 260 tag = ' ' 261 prefix = ' ' 262 text = text.rstrip() 263 suffix = '' 264 else: 265 prefix = '"' 266 suffix = '\\n"' 267 line = tag + prefix + text + suffix 268 self.dumper._write(line) 269 self.dumper.level -= 1 270 return 271 self.dumper.visit(a) 272 self.dumper._write(' -> ') 273 self.dumper.visit(b) 274 275 def isMultilineString(self, value): 276 return isinstance(value, str) and '\n' in value 277 278 def replaceMultilineString(self, a, b): 279 self.dumper.visit(a) 280 self.dumper._write(' -> ') 281 self.dumper.visit(b) 282 283 284# 285# Unfortunately JSON standard does not include comments, but this is a quite 286# useful feature to have on regressions tests 287# 288 289_token_res = [ 290 r'//[^\r\n]*', # comment 291 r'"[^"\\]*(\\.[^"\\]*)*"', # string 292] 293 294_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL) 295 296 297def _strip_comment(mo): 298 if mo.group(1): 299 return '' 300 else: 301 return mo.group(0) 302 303 304def _strip_comments(data): 305 '''Strip (non-standard) JSON comments.''' 306 return _tokens_re.sub(_strip_comment, data) 307 308 309assert _strip_comments('''// a comment 310"// a comment in a string 311"''') == ''' 312"// a comment in a string 313"''' 314 315 316def load(stream, strip_images = True, strip_comments = True): 317 if strip_images: 318 object_hook = strip_object_hook 319 else: 320 object_hook = None 321 if strip_comments: 322 data = stream.read() 323 data = _strip_comments(data) 324 return json.loads(data, strict=False, object_hook = object_hook) 325 else: 326 return json.load(stream, strict=False, object_hook = object_hook) 327 328 329def main(): 330 optparser = argparse.ArgumentParser( 331 description="Diff JSON format state dump files") 332 optparser.add_argument("-k", "--keep-images", 333 action="store_false", dest="strip_images", default=True, 334 help="compare images") 335 336 optparser.add_argument("ref_json", action="store", 337 type=str, help="reference state file") 338 optparser.add_argument("src_json", action="store", 339 type=str, help="source state file") 340 341 args = optparser.parse_args() 342 343 a = load(open(args.ref_json, 'rt'), args.strip_images) 344 b = load(open(args.src_json, 'rt'), args.strip_images) 345 346 if False: 347 dumper = Dumper() 348 dumper.visit(a) 349 350 differ = Differ() 351 differ.visit(a, b) 352 353 354if __name__ == '__main__': 355 main() 356