1#!/usr/bin/python 2# 3# Modified from the htmldiff script developed by Dominique HazaC+l-Massieux 4# for the http://services.w3.org/htmldiff website. That script did not 5# include a copyright statement. 6 7import atexit 8import os 9import re 10import sys 11import tempfile 12import tidy 13 14from subprocess import Popen, PIPE 15 16def tidyFile(filename): 17 ifp = open(filename, 'r') 18 19 # option for tidy 20 options = dict(tidy_mark=0,show_warnings=0,quiet=1,char_encoding='utf8') 21 html5 = re.search(r"<!doctype\s+html\s*>", ifp.read(4096), 22 re.IGNORECASE) 23 ifp.seek(0) 24 html5_options = {'add_xml_space': 'no', 25 'output_xhtml': 'no', 26 'tidy_mark': 'no', 27 'new_blocklevel_tags': 'article,aside,canvas,dialog,details,figcaption,figure,footer,header,hgroup,menu,nav,section,main,summary,math,semantics,mrow,mfenced,mtable,mtr,mtd,mi,mn,msub,mo,mfrac,munderover,mtext,svg,g,image,rect,text,desc,line,path,polygon,ellipse,tspan,defs,feoffset,fecolormatrix,filter,fegaussianblur,feblend,marker,circle', 28 'new_inline_tags': 'video,audio,canvas,ruby,rt,rp,time,meter,progress,track,source,emu-val,emu-nt,emu-t,mark', 29 'break_before_br': 'no', 30 'vertical_space': 'no', 31 'enclose_text': 'no', 32 'numeric_entities': 'yes', 33 'wrap': '1000', 34 'wrap_attributes': 'no', 35 'drop_empty_paras': 'no' 36 } 37 if html5: 38 options.update(html5_options) 39 newtidy = tidy.parseString(ifp.read(), **options) 40 if len(newtidy.errors) > 0: 41 if not html5: 42 ifp.seek(0) 43 options.update(html5_options) 44 newtidy = tidy.parseString(ifp.read(), **options) 45 ifp.close() 46 47 fp = tempfile.NamedTemporaryFile( 48 mode='w+', prefix='htmldiff-', suffix='.html') 49 atexit.register(fp.close) 50 fp.write(str(newtidy)) 51 fp.flush() 52 fp.seek(0) 53 54 # sys.stderr.write('tidyFile: tempfile name %s\n' % fp.name) 55 56 if (newtidy.errors): 57 sys.stderr.write('tidyFile: tidy.parseString error: %s\n' % str(newtidy.errors)) 58 return fp 59 60if __name__ == '__main__': 61 if (len(sys.argv) < 3): 62 sys.stderr.write('tidy: need args file1 file2\n') 63 sys.exit(1) 64 65 refdoc = tidyFile(sys.argv[1]) 66 newdoc = tidyFile(sys.argv[2]) 67 68 scriptdir = os.path.abspath(os.path.dirname(sys.argv[0])) 69 perlscript = os.path.join(scriptdir, 'htmldiff.pl') 70 71 p = Popen([perlscript, refdoc.name, newdoc.name], 72 stdin=PIPE, stdout=PIPE, stderr=PIPE) 73 sys.stdout.flush() 74 sys.stderr.flush() 75 (out, err) = p.communicate() 76 p.stdin.close() 77 if err: 78 sys.stderr.write('htmldiff: An error occured when running htmldiff.pl on the documents:', str(err)) 79 exit(1) 80 else: 81 print(out) 82