1#!/usr/bin/env python3 2# 3# Modified from the htmldiff script developed by Dominique Hazael-Massieux 4# for the http://services.w3.org/htmldiff website. 5# License information found at https://github.com/w3c/htmldiff-ui/blob/master/LICENSE 6# for "htmldiffy.py". 7# 8# Copyright (c) 2008-2020 w3c 9# Copyright 2016-2024 The Khronos Group Inc. 10# SPDX-License-Identifier: MIT 11# 12# Permission is hereby granted, free of charge, to any person obtaining a copy 13# of this software and associated documentation files (the "Software"), to deal 14# in the Software without restriction, including without limitation the rights 15# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 16# copies of the Software, and to permit persons to whom the Software is 17# furnished to do so, subject to the following conditions: 18# 19# The above copyright notice and this permission notice shall be included in all 20# copies or substantial portions of the Software. 21# 22# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 28# SOFTWARE. 29 30import atexit 31import os 32import re 33import sys 34import tempfile 35import tidy 36 37from subprocess import Popen, PIPE 38 39def tidyFile(filename): 40 ifp = open(filename, 'r') 41 42 # option for tidy 43 options = dict(tidy_mark=0,show_warnings=0,quiet=1,char_encoding='utf8') 44 html5 = re.search(r"<!doctype\s+html\s*>", ifp.read(4096), 45 re.IGNORECASE) 46 ifp.seek(0) 47 html5_options = {'add_xml_space': 'no', 48 'output_xhtml': 'no', 49 'tidy_mark': 'no', 50 'new_blocklevel_tags': 'article,aside,canvas,dialog,details,figcaption,figure,footer,header,hgroup,menu,nav,section,main,summary,math,semantics,mrow,mfenced,mtable,mtr,mtd,mi,mn,msub,mo,mfrac,munderover,mtext,svg,g,image,rect,text,desc,line,path,polygon,ellipse,tspan,defs,feoffset,fecolormatrix,filter,fegaussianblur,feblend,marker,circle', 51 'new_inline_tags': 'video,audio,canvas,ruby,rt,rp,time,meter,progress,track,source,emu-val,emu-nt,emu-t,mark', 52 'break_before_br': 'no', 53 'vertical_space': 'no', 54 'enclose_text': 'no', 55 'numeric_entities': 'yes', 56 'wrap': '1000', 57 'wrap_attributes': 'no', 58 'drop_empty_paras': 'no' 59 } 60 if html5: 61 options.update(html5_options) 62 newtidy = tidy.parseString(ifp.read(), **options) 63 if len(newtidy.errors) > 0: 64 if not html5: 65 ifp.seek(0) 66 options.update(html5_options) 67 newtidy = tidy.parseString(ifp.read(), **options) 68 ifp.close() 69 70 fp = tempfile.NamedTemporaryFile( 71 mode='w+', prefix='htmldiff-', suffix='.html') 72 atexit.register(fp.close) 73 fp.write(str(newtidy)) 74 fp.flush() 75 fp.seek(0) 76 77 # sys.stderr.write('tidyFile: tempfile name %s\n' % fp.name) 78 79 if (newtidy.errors): 80 sys.stderr.write('tidyFile: tidy.parseString error: %s\n' % str(newtidy.errors)) 81 return fp 82 83def call_perl(args): 84 85 scriptdir = os.path.abspath(os.path.dirname(sys.argv[0])) 86 perlscript = os.path.join(scriptdir, 'htmldiff.pl') 87 cmd = [perlscript] 88 cmd.extend(args) 89 p = Popen(cmd, 90 text=True, 91 stdin=PIPE, stdout=PIPE, stderr=PIPE) 92 sys.stdout.flush() 93 sys.stderr.flush() 94 (out, err) = p.communicate() 95 p.stdin.close() 96 if err: 97 print(out) 98 sys.stderr.write('htmldiff: An error occurred when running htmldiff.pl on the documents: %s\n'% str(err)) 99 exit(1) 100 else: 101 print(out) 102 exit(0) 103 104def usage(): 105 # did not investigate fully what -c does - something about mhtml comments? 106 sys.stderr.write("""htmldiff: need two filename args file1 file2 107 108May also pass arguments: 109 -l Make diff highlights links that jump to the following diff 110 -t Add a script to optionally hide old text via button 111 -o Complete omit old text 112 -h show this text 113""") 114 sys.exit(1) 115 116if __name__ == '__main__': 117 118 docs = [] 119 passthru_args = [] 120 for arg in sys.argv[1:]: 121 if arg in ('-c', '-l', '-t', '-o'): 122 passthru_args.append(arg) 123 elif arg == '-h': 124 usage() 125 else: 126 docs.append(arg) 127 128 if (len(docs) != 2): 129 usage() 130 refdoc = tidyFile(docs[0]) 131 132 newdoc = tidyFile(docs[1]) 133 passthru_args.append(refdoc.name) 134 passthru_args.append(newdoc.name) 135 call_perl(passthru_args) 136