• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Modified from the htmldiff script developed by Dominique Hazael-Massieux
4# for the http://services.w3.org/htmldiff website.
5# License information found at https://github.com/w3c/htmldiff-ui/blob/master/LICENSE
6# for "htmldiffy.py".
7#
8# Copyright (c) 2008-2020 w3c
9# Copyright 2016-2024 The Khronos Group Inc.
10# SPDX-License-Identifier: MIT
11#
12# Permission is hereby granted, free of charge, to any person obtaining a copy
13# of this software and associated documentation files (the "Software"), to deal
14# in the Software without restriction, including without limitation the rights
15# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16# copies of the Software, and to permit persons to whom the Software is
17# furnished to do so, subject to the following conditions:
18#
19# The above copyright notice and this permission notice shall be included in all
20# copies or substantial portions of the Software.
21#
22# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28# SOFTWARE.
29
30import atexit
31import os
32import re
33import sys
34import tempfile
35import tidy
36
37from subprocess import Popen, PIPE
38
39def tidyFile(filename):
40    ifp = open(filename, 'r')
41
42    # option for tidy
43    options = dict(tidy_mark=0,show_warnings=0,quiet=1,char_encoding='utf8')
44    html5 = re.search(r"<!doctype\s+html\s*>", ifp.read(4096),
45                      re.IGNORECASE)
46    ifp.seek(0)
47    html5_options = {'add_xml_space': 'no',
48                     'output_xhtml': 'no',
49                     'tidy_mark': 'no',
50                     'new_blocklevel_tags': 'article,aside,canvas,dialog,details,figcaption,figure,footer,header,hgroup,menu,nav,section,main,summary,math,semantics,mrow,mfenced,mtable,mtr,mtd,mi,mn,msub,mo,mfrac,munderover,mtext,svg,g,image,rect,text,desc,line,path,polygon,ellipse,tspan,defs,feoffset,fecolormatrix,filter,fegaussianblur,feblend,marker,circle',
51                     'new_inline_tags': 'video,audio,canvas,ruby,rt,rp,time,meter,progress,track,source,emu-val,emu-nt,emu-t,mark',
52                     'break_before_br': 'no',
53                     'vertical_space': 'no',
54                     'enclose_text': 'no',
55                     'numeric_entities': 'yes',
56                     'wrap': '1000',
57                     'wrap_attributes': 'no',
58                     'drop_empty_paras': 'no'
59                     }
60    if html5:
61        options.update(html5_options)
62    newtidy = tidy.parseString(ifp.read(), **options)
63    if len(newtidy.errors) > 0:
64        if not html5:
65            ifp.seek(0)
66            options.update(html5_options)
67            newtidy = tidy.parseString(ifp.read(), **options)
68    ifp.close()
69
70    fp = tempfile.NamedTemporaryFile(
71           mode='w+', prefix='htmldiff-', suffix='.html')
72    atexit.register(fp.close)
73    fp.write(str(newtidy))
74    fp.flush()
75    fp.seek(0)
76
77    # sys.stderr.write('tidyFile: tempfile name %s\n' % fp.name)
78
79    if (newtidy.errors):
80        sys.stderr.write('tidyFile: tidy.parseString error: %s\n' % str(newtidy.errors))
81    return fp
82
83def call_perl(args):
84
85    scriptdir = os.path.abspath(os.path.dirname(sys.argv[0]))
86    perlscript = os.path.join(scriptdir, 'htmldiff.pl')
87    cmd = [perlscript]
88    cmd.extend(args)
89    p = Popen(cmd,
90              text=True,
91              stdin=PIPE, stdout=PIPE, stderr=PIPE)
92    sys.stdout.flush()
93    sys.stderr.flush()
94    (out, err) = p.communicate()
95    p.stdin.close()
96    if err:
97        print(out)
98        sys.stderr.write('htmldiff: An error occurred when running htmldiff.pl on the documents: %s\n'% str(err))
99        exit(1)
100    else:
101        print(out)
102        exit(0)
103
104def usage():
105    # did not investigate fully what -c does - something about mhtml comments?
106    sys.stderr.write("""htmldiff: need two filename args file1 file2
107
108May also pass arguments:
109    -l  Make diff highlights links that jump to the following diff
110    -t  Add a script to optionally hide old text via button
111    -o  Complete omit old text
112    -h  show this text
113""")
114    sys.exit(1)
115
116if __name__ == '__main__':
117
118    docs = []
119    passthru_args = []
120    for arg in sys.argv[1:]:
121        if arg in ('-c', '-l', '-t', '-o'):
122            passthru_args.append(arg)
123        elif arg == '-h':
124            usage()
125        else:
126            docs.append(arg)
127
128    if (len(docs) != 2):
129        usage()
130    refdoc = tidyFile(docs[0])
131
132    newdoc = tidyFile(docs[1])
133    passthru_args.append(refdoc.name)
134    passthru_args.append(newdoc.name)
135    call_perl(passthru_args)
136