• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===-----------------------------------------------------------------------===#
10
11
12from __future__ import print_function
13
14import argparse
15import collections
16import difflib
17import json
18import logging
19import os
20import re
21import sys
22
23
24#===-----------------------------------------------------------------------===#
25# These data structures represent a deserialized ExplodedGraph.
26#===-----------------------------------------------------------------------===#
27
28
29# A helper function for finding the difference between two dictionaries.
30def diff_dicts(curr, prev):
31    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
32    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
33    return (removed, added)
34
35
36# Represents any program state trait that is a dictionary of key-value pairs.
37class GenericMap:
38    def __init__(self, items):
39        self.generic_map = collections.OrderedDict(items)
40
41    def diff(self, prev):
42        return diff_dicts(self.generic_map, prev.generic_map)
43
44    def is_different(self, prev):
45        removed, added = self.diff(prev)
46        return len(removed) != 0 or len(added) != 0
47
48
49# A deserialized source location.
50class SourceLocation:
51    def __init__(self, json_loc):
52        logging.debug('json: %s' % json_loc)
53        self.line = json_loc['line']
54        self.col = json_loc['column']
55        self.filename = os.path.basename(json_loc['file']) \
56            if 'file' in json_loc else '(main file)'
57        self.spelling = SourceLocation(json_loc['spelling']) \
58            if 'spelling' in json_loc else None
59
60    def is_macro(self):
61        return self.spelling is not None
62
63
64# A deserialized program point.
65class ProgramPoint:
66    def __init__(self, json_pp):
67        self.kind = json_pp['kind']
68        self.tag = json_pp['tag']
69        self.node_id = json_pp['node_id']
70        self.is_sink = bool(json_pp['is_sink'])
71        self.has_report = bool(json_pp['has_report'])
72        if self.kind == 'Edge':
73            self.src_id = json_pp['src_id']
74            self.dst_id = json_pp['dst_id']
75        elif self.kind == 'Statement':
76            logging.debug(json_pp)
77            self.stmt_kind = json_pp['stmt_kind']
78            self.cast_kind = json_pp['cast_kind'] \
79                if 'cast_kind' in json_pp else None
80            self.stmt_point_kind = json_pp['stmt_point_kind']
81            self.stmt_id = json_pp['stmt_id']
82            self.pointer = json_pp['pointer']
83            self.pretty = json_pp['pretty']
84            self.loc = SourceLocation(json_pp['location']) \
85                if json_pp['location'] is not None else None
86        elif self.kind == 'BlockEntrance':
87            self.block_id = json_pp['block_id']
88
89
90# A single expression acting as a key in a deserialized Environment.
91class EnvironmentBindingKey:
92    def __init__(self, json_ek):
93        # CXXCtorInitializer is not a Stmt!
94        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
95            else json_ek['init_id']
96        self.pretty = json_ek['pretty']
97        self.kind = json_ek['kind'] if 'kind' in json_ek else None
98
99    def _key(self):
100        return self.stmt_id
101
102    def __eq__(self, other):
103        return self._key() == other._key()
104
105    def __hash__(self):
106        return hash(self._key())
107
108
109# Deserialized description of a location context.
110class LocationContext:
111    def __init__(self, json_frame):
112        self.lctx_id = json_frame['lctx_id']
113        self.caption = json_frame['location_context']
114        self.decl = json_frame['calling']
115        self.loc = SourceLocation(json_frame['location']) \
116            if json_frame['location'] is not None else None
117
118    def _key(self):
119        return self.lctx_id
120
121    def __eq__(self, other):
122        return self._key() == other._key()
123
124    def __hash__(self):
125        return hash(self._key())
126
127
128# A group of deserialized Environment bindings that correspond to a specific
129# location context.
130class EnvironmentFrame:
131    def __init__(self, json_frame):
132        self.location_context = LocationContext(json_frame)
133        self.bindings = collections.OrderedDict(
134            [(EnvironmentBindingKey(b),
135              b['value']) for b in json_frame['items']]
136            if json_frame['items'] is not None else [])
137
138    def diff_bindings(self, prev):
139        return diff_dicts(self.bindings, prev.bindings)
140
141    def is_different(self, prev):
142        removed, added = self.diff_bindings(prev)
143        return len(removed) != 0 or len(added) != 0
144
145
146# A deserialized Environment. This class can also hold other entities that
147# are similar to Environment, such as Objects Under Construction.
148class GenericEnvironment:
149    def __init__(self, json_e):
150        self.frames = [EnvironmentFrame(f) for f in json_e]
151
152    def diff_frames(self, prev):
153        # TODO: It's difficult to display a good diff when frame numbers shift.
154        if len(self.frames) != len(prev.frames):
155            return None
156
157        updated = []
158        for i in range(len(self.frames)):
159            f = self.frames[i]
160            prev_f = prev.frames[i]
161            if f.location_context == prev_f.location_context:
162                if f.is_different(prev_f):
163                    updated.append(i)
164            else:
165                # We have the whole frame replaced with another frame.
166                # TODO: Produce a nice diff.
167                return None
168
169        # TODO: Add support for added/removed.
170        return updated
171
172    def is_different(self, prev):
173        updated = self.diff_frames(prev)
174        return updated is None or len(updated) > 0
175
176
177# A single binding key in a deserialized RegionStore cluster.
178class StoreBindingKey:
179    def __init__(self, json_sk):
180        self.kind = json_sk['kind']
181        self.offset = json_sk['offset']
182
183    def _key(self):
184        return (self.kind, self.offset)
185
186    def __eq__(self, other):
187        return self._key() == other._key()
188
189    def __hash__(self):
190        return hash(self._key())
191
192
193# A single cluster of the deserialized RegionStore.
194class StoreCluster:
195    def __init__(self, json_sc):
196        self.base_region = json_sc['cluster']
197        self.bindings = collections.OrderedDict(
198            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
199
200    def diff_bindings(self, prev):
201        return diff_dicts(self.bindings, prev.bindings)
202
203    def is_different(self, prev):
204        removed, added = self.diff_bindings(prev)
205        return len(removed) != 0 or len(added) != 0
206
207
208# A deserialized RegionStore.
209class Store:
210    def __init__(self, json_s):
211        self.ptr = json_s['pointer']
212        self.clusters = collections.OrderedDict(
213            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])
214
215    def diff_clusters(self, prev):
216        removed = [k for k in prev.clusters if k not in self.clusters]
217        added = [k for k in self.clusters if k not in prev.clusters]
218        updated = [k for k in prev.clusters if k in self.clusters
219                   and prev.clusters[k].is_different(self.clusters[k])]
220        return (removed, added, updated)
221
222    def is_different(self, prev):
223        removed, added, updated = self.diff_clusters(prev)
224        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
225
226
227# Deserialized messages from a single checker in a single program state.
228# Basically a list of raw strings.
229class CheckerLines:
230    def __init__(self, json_lines):
231        self.lines = json_lines
232
233    def diff_lines(self, prev):
234        lines = difflib.ndiff(prev.lines, self.lines)
235        return [l.strip() for l in lines
236                if l.startswith('+') or l.startswith('-')]
237
238    def is_different(self, prev):
239        return len(self.diff_lines(prev)) > 0
240
241
242# Deserialized messages of all checkers, separated by checker.
243class CheckerMessages:
244    def __init__(self, json_m):
245        self.items = collections.OrderedDict(
246            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])
247
248    def diff_messages(self, prev):
249        removed = [k for k in prev.items if k not in self.items]
250        added = [k for k in self.items if k not in prev.items]
251        updated = [k for k in prev.items if k in self.items
252                   and prev.items[k].is_different(self.items[k])]
253        return (removed, added, updated)
254
255    def is_different(self, prev):
256        removed, added, updated = self.diff_messages(prev)
257        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
258
259
260# A deserialized program state.
261class ProgramState:
262    def __init__(self, state_id, json_ps):
263        logging.debug('Adding ProgramState ' + str(state_id))
264
265        if json_ps is None:
266            json_ps = {
267                'store': None,
268                'environment': None,
269                'constraints': None,
270                'dynamic_types': None,
271                'constructing_objects': None,
272                'checker_messages': None
273            }
274
275        self.state_id = state_id
276
277        self.store = Store(json_ps['store']) \
278            if json_ps['store'] is not None else None
279
280        self.environment = \
281            GenericEnvironment(json_ps['environment']['items']) \
282            if json_ps['environment'] is not None else None
283
284        self.constraints = GenericMap([
285            (c['symbol'], c['range']) for c in json_ps['constraints']
286        ]) if json_ps['constraints'] is not None else None
287
288        self.dynamic_types = GenericMap([
289                (t['region'], '%s%s' % (t['dyn_type'],
290                                        ' (or a sub-class)'
291                                        if t['sub_classable'] else ''))
292                for t in json_ps['dynamic_types']]) \
293            if json_ps['dynamic_types'] is not None else None
294
295        self.constructing_objects = \
296            GenericEnvironment(json_ps['constructing_objects']) \
297            if json_ps['constructing_objects'] is not None else None
298
299        self.checker_messages = CheckerMessages(json_ps['checker_messages']) \
300            if json_ps['checker_messages'] is not None else None
301
302
303# A deserialized exploded graph node. Has a default constructor because it
304# may be referenced as part of an edge before its contents are deserialized,
305# and in this moment we already need a room for predecessors and successors.
306class ExplodedNode:
307    def __init__(self):
308        self.predecessors = []
309        self.successors = []
310
311    def construct(self, node_id, json_node):
312        logging.debug('Adding ' + node_id)
313        self.ptr = node_id[4:]
314        self.points = [ProgramPoint(p) for p in json_node['program_points']]
315        self.node_id = self.points[-1].node_id
316        self.state = ProgramState(json_node['state_id'],
317                                  json_node['program_state']
318            if json_node['program_state'] is not None else None);
319
320        assert self.node_name() == node_id
321
322    def node_name(self):
323        return 'Node' + self.ptr
324
325
326# A deserialized ExplodedGraph. Constructed by consuming a .dot file
327# line-by-line.
328class ExplodedGraph:
329    # Parse .dot files with regular expressions.
330    node_re = re.compile(
331        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
332    edge_re = re.compile(
333        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
334
335    def __init__(self):
336        self.nodes = collections.defaultdict(ExplodedNode)
337        self.root_id = None
338        self.incomplete_line = ''
339
340    def add_raw_line(self, raw_line):
341        if raw_line.startswith('//'):
342            return
343
344        # Allow line breaks by waiting for ';'. This is not valid in
345        # a .dot file, but it is useful for writing tests.
346        if len(raw_line) > 0 and raw_line[-1] != ';':
347            self.incomplete_line += raw_line
348            return
349        raw_line = self.incomplete_line + raw_line
350        self.incomplete_line = ''
351
352        # Apply regexps one by one to see if it's a node or an edge
353        # and extract contents if necessary.
354        logging.debug('Line: ' + raw_line)
355        result = self.edge_re.match(raw_line)
356        if result is not None:
357            logging.debug('Classified as edge line.')
358            pred = result.group(1)
359            succ = result.group(2)
360            self.nodes[pred].successors.append(succ)
361            self.nodes[succ].predecessors.append(pred)
362            return
363        result = self.node_re.match(raw_line)
364        if result is not None:
365            logging.debug('Classified as node line.')
366            node_id = result.group(1)
367            if len(self.nodes) == 0:
368                self.root_id = node_id
369            # Note: when writing tests you don't need to escape everything,
370            # even though in a valid dot file everything is escaped.
371            node_label = result.group(2).replace(' ', '') \
372                                        .replace('\\"', '"') \
373                                        .replace('\\{', '{') \
374                                        .replace('\\}', '}') \
375                                        .replace('\\\\', '\\') \
376                                        .replace('\\|', '|') \
377                                        .replace('\\<', '\\\\<') \
378                                        .replace('\\>', '\\\\>') \
379                                        .rstrip(',')
380            # Handle `\l` separately because a string literal can be in code
381            # like "string\\literal" with the `\l` inside.
382            # Also on Windows macros __FILE__ produces specific delimiters `\`
383            # and a directory or file may starts with the letter `l`.
384            # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
385            # because the literal as a rule containes multiple `\` before `\l`.
386            node_label = re.sub(r'(?<!\\)\\l', '', node_label)
387            logging.debug(node_label)
388            json_node = json.loads(node_label)
389            self.nodes[node_id].construct(node_id, json_node)
390            return
391        logging.debug('Skipping.')
392
393
394#===-----------------------------------------------------------------------===#
395# Visitors traverse a deserialized ExplodedGraph and do different things
396# with every node and edge.
397#===-----------------------------------------------------------------------===#
398
399
400# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
401# syntax highlighing.
402class DotDumpVisitor:
403    def __init__(self, do_diffs, dark_mode, gray_mode,
404                 topo_mode, dump_dot_only):
405        self._do_diffs = do_diffs
406        self._dark_mode = dark_mode
407        self._gray_mode = gray_mode
408        self._topo_mode = topo_mode
409        self._dump_dot_only = dump_dot_only
410        self._output = []
411
412    def _dump_raw(self, s):
413        if self._dump_dot_only:
414            print(s, end='')
415        else:
416            self._output.append(s)
417
418    def output(self):
419        assert not self._dump_dot_only
420        if sys.version_info[0] > 2 and sys.version_info[1] >= 5:
421            return ''.join(self._output).encode()
422        else:
423            return ''.join(self._output)
424
425    def _dump(self, s):
426        s = s.replace('&', '&amp;') \
427             .replace('{', '\\{') \
428             .replace('}', '\\}') \
429             .replace('\\<', '&lt;') \
430             .replace('\\>', '&gt;') \
431             .replace('|', '\\|')
432        s = re.sub(r'(?<!\\)\\l', '<br />', s)
433        if self._gray_mode:
434            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
435            s = re.sub(r'</font>', '', s)
436        self._dump_raw(s)
437
438    @staticmethod
439    def _diff_plus_minus(is_added):
440        if is_added is None:
441            return ''
442        if is_added:
443            return '<font color="forestgreen">+</font>'
444        return '<font color="red">-</font>'
445
446    @staticmethod
447    def _short_pretty(s):
448        if s is None:
449            return None
450        if len(s) < 20:
451            return s
452        left = s.find('{')
453        right = s.rfind('}')
454        if left == -1 or right == -1 or left >= right:
455            return s
456        candidate = s[0:left + 1] + ' ... ' + s[right:]
457        if len(candidate) >= len(s):
458            return s
459        return candidate
460
461    @staticmethod
462    def _make_sloc(loc):
463        if loc is None:
464            return '<i>Invalid Source Location</i>'
465
466        def make_plain_loc(loc):
467            return '%s:<b>%s</b>:<b>%s</b>' \
468                % (loc.filename, loc.line, loc.col)
469
470        if loc.is_macro():
471            return '%s <font color="royalblue1">' \
472                   '(<i>spelling at </i> %s)</font>' \
473                % (make_plain_loc(loc), make_plain_loc(loc.spelling))
474
475        return make_plain_loc(loc)
476
477    def visit_begin_graph(self, graph):
478        self._graph = graph
479        self._dump_raw('digraph "ExplodedGraph" {\n')
480        if self._dark_mode:
481            self._dump_raw('bgcolor="gray10";\n')
482        self._dump_raw('label="";\n')
483
484    def visit_program_point(self, p):
485        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
486            color = 'gold3'
487        elif p.kind in ['PreStmtPurgeDeadSymbols',
488                        'PostStmtPurgeDeadSymbols']:
489            color = 'red'
490        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
491            color = 'dodgerblue' if self._dark_mode else 'blue'
492        elif p.kind in ['Statement']:
493            color = 'cyan4'
494        else:
495            color = 'forestgreen'
496
497        self._dump('<tr><td align="left">%s.</td>' % p.node_id)
498
499        if p.kind == 'Statement':
500            # This avoids pretty-printing huge statements such as CompoundStmt.
501            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
502            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
503            stmt_color = 'cyan3'
504            self._dump('<td align="left" width="0">%s:</td>'
505                       '<td align="left" width="0"><font color="%s">'
506                       '%s</font> </td>'
507                       '<td align="left"><i>S%s</i></td>'
508                       '<td align="left"><font color="%s">%s</font></td>'
509                       '<td align="left">%s</td></tr>'
510                       % (self._make_sloc(p.loc), color,
511                          '%s (%s)' % (p.stmt_kind, p.cast_kind)
512                          if p.cast_kind is not None else p.stmt_kind,
513                          p.stmt_id, stmt_color, p.stmt_point_kind,
514                          self._short_pretty(p.pretty)
515                          if not skip_pretty else ''))
516        elif p.kind == 'Edge':
517            self._dump('<td width="0"></td>'
518                       '<td align="left" width="0">'
519                       '<font color="%s">%s</font></td><td align="left">'
520                       '[B%d] -\\> [B%d]</td></tr>'
521                       % (color, 'BlockEdge', p.src_id, p.dst_id))
522        elif p.kind == 'BlockEntrance':
523            self._dump('<td width="0"></td>'
524                       '<td align="left" width="0">'
525                       '<font color="%s">%s</font></td>'
526                       '<td align="left">[B%d]</td></tr>'
527                       % (color, p.kind, p.block_id))
528        else:
529            # TODO: Print more stuff for other kinds of points.
530            self._dump('<td width="0"></td>'
531                       '<td align="left" width="0" colspan="2">'
532                       '<font color="%s">%s</font></td></tr>'
533                       % (color, p.kind))
534
535        if p.tag is not None:
536            self._dump('<tr><td width="0"></td><td width="0"></td>'
537                       '<td colspan="3" align="left">'
538                       '<b>Tag: </b> <font color="crimson">'
539                       '%s</font></td></tr>' % p.tag)
540
541        if p.has_report:
542            self._dump('<tr><td width="0"></td><td width="0"></td>'
543                       '<td colspan="3" align="left">'
544                       '<font color="red"><b>Bug Report Attached'
545                       '</b></font></td></tr>')
546        if p.is_sink:
547            self._dump('<tr><td width="0"></td><td width="0"></td>'
548                       '<td colspan="3" align="left">'
549                       '<font color="cornflowerblue"><b>Sink Node'
550                       '</b></font></td></tr>')
551
552    def visit_environment(self, e, prev_e=None):
553        self._dump('<table border="0">')
554
555        def dump_location_context(lc, is_added=None):
556            self._dump('<tr><td>%s</td>'
557                       '<td align="left"><b>%s</b></td>'
558                       '<td align="left" colspan="2">'
559                       '<font color="gray60">%s </font>'
560                       '%s</td></tr>'
561                       % (self._diff_plus_minus(is_added),
562                          lc.caption, lc.decl,
563                          ('(%s)' % self._make_sloc(lc.loc))
564                          if lc.loc is not None else ''))
565
566        def dump_binding(f, b, is_added=None):
567            self._dump('<tr><td>%s</td>'
568                       '<td align="left"><i>S%s</i></td>'
569                       '%s'
570                       '<td align="left">%s</td>'
571                       '<td align="left">%s</td></tr>'
572                       % (self._diff_plus_minus(is_added),
573                          b.stmt_id,
574                          '<td align="left"><font color="%s"><i>'
575                          '%s</i></font></td>' % (
576                              'lavender' if self._dark_mode else 'darkgreen',
577                              ('(%s)' % b.kind) if b.kind is not None else ' '
578                          ),
579                          self._short_pretty(b.pretty), f.bindings[b]))
580
581        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
582        if frames_updated:
583            for i in frames_updated:
584                f = e.frames[i]
585                prev_f = prev_e.frames[i]
586                dump_location_context(f.location_context)
587                bindings_removed, bindings_added = f.diff_bindings(prev_f)
588                for b in bindings_removed:
589                    dump_binding(prev_f, b, False)
590                for b in bindings_added:
591                    dump_binding(f, b, True)
592        else:
593            for f in e.frames:
594                dump_location_context(f.location_context)
595                for b in f.bindings:
596                    dump_binding(f, b)
597
598        self._dump('</table>')
599
600    def visit_environment_in_state(self, selector, title, s, prev_s=None):
601        e = getattr(s, selector)
602        prev_e = getattr(prev_s, selector) if prev_s is not None else None
603        if e is None and prev_e is None:
604            return
605
606        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
607        if e is None:
608            self._dump('<i> Nothing!</i>')
609        else:
610            if prev_e is not None:
611                if e.is_different(prev_e):
612                    self._dump('</td></tr><tr><td align="left">')
613                    self.visit_environment(e, prev_e)
614                else:
615                    self._dump('<i> No changes!</i>')
616            else:
617                self._dump('</td></tr><tr><td align="left">')
618                self.visit_environment(e)
619
620        self._dump('</td></tr>')
621
622    def visit_store(self, s, prev_s=None):
623        self._dump('<table border="0">')
624
625        def dump_binding(s, c, b, is_added=None):
626            self._dump('<tr><td>%s</td>'
627                       '<td align="left">%s</td>'
628                       '<td align="left">%s</td>'
629                       '<td align="left">%s</td>'
630                       '<td align="left">%s</td></tr>'
631                       % (self._diff_plus_minus(is_added),
632                          s.clusters[c].base_region, b.offset,
633                          '(<i>Default</i>)' if b.kind == 'Default'
634                          else '',
635                          s.clusters[c].bindings[b]))
636
637        if prev_s is not None:
638            clusters_removed, clusters_added, clusters_updated = \
639                s.diff_clusters(prev_s)
640            for c in clusters_removed:
641                for b in prev_s.clusters[c].bindings:
642                    dump_binding(prev_s, c, b, False)
643            for c in clusters_updated:
644                bindings_removed, bindings_added = \
645                    s.clusters[c].diff_bindings(prev_s.clusters[c])
646                for b in bindings_removed:
647                    dump_binding(prev_s, c, b, False)
648                for b in bindings_added:
649                    dump_binding(s, c, b, True)
650            for c in clusters_added:
651                for b in s.clusters[c].bindings:
652                    dump_binding(s, c, b, True)
653        else:
654            for c in s.clusters:
655                for b in s.clusters[c].bindings:
656                    dump_binding(s, c, b)
657
658        self._dump('</table>')
659
660    def visit_store_in_state(self, s, prev_s=None):
661        st = s.store
662        prev_st = prev_s.store if prev_s is not None else None
663        if st is None and prev_st is None:
664            return
665
666        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
667        if st is None:
668            self._dump('<i> Nothing!</i>')
669        else:
670            if self._dark_mode:
671                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
672            else:
673                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
674            if prev_st is not None:
675                if s.store.is_different(prev_st):
676                    self._dump('</td></tr><tr><td align="left">')
677                    self.visit_store(st, prev_st)
678                else:
679                    self._dump('<i> No changes!</i>')
680            else:
681                self._dump('</td></tr><tr><td align="left">')
682                self.visit_store(st)
683        self._dump('</td></tr>')
684
685    def visit_generic_map(self, m, prev_m=None):
686        self._dump('<table border="0">')
687
688        def dump_pair(m, k, is_added=None):
689            self._dump('<tr><td>%s</td>'
690                       '<td align="left">%s</td>'
691                       '<td align="left">%s</td></tr>'
692                       % (self._diff_plus_minus(is_added),
693                          k, m.generic_map[k]))
694
695        if prev_m is not None:
696            removed, added = m.diff(prev_m)
697            for k in removed:
698                dump_pair(prev_m, k, False)
699            for k in added:
700                dump_pair(m, k, True)
701        else:
702            for k in m.generic_map:
703                dump_pair(m, k, None)
704
705        self._dump('</table>')
706
707    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
708        m = getattr(s, selector)
709        prev_m = getattr(prev_s, selector) if prev_s is not None else None
710        if m is None and prev_m is None:
711            return
712
713        self._dump('<hr />')
714        self._dump('<tr><td align="left">'
715                   '<b>%s: </b>' % title)
716        if m is None:
717            self._dump('<i> Nothing!</i>')
718        else:
719            if prev_m is not None:
720                if m.is_different(prev_m):
721                    self._dump('</td></tr><tr><td align="left">')
722                    self.visit_generic_map(m, prev_m)
723                else:
724                    self._dump('<i> No changes!</i>')
725            else:
726                self._dump('</td></tr><tr><td align="left">')
727                self.visit_generic_map(m)
728
729        self._dump('</td></tr>')
730
731    def visit_checker_messages(self, m, prev_m=None):
732        self._dump('<table border="0">')
733
734        def dump_line(l, is_added=None):
735            self._dump('<tr><td>%s</td>'
736                       '<td align="left">%s</td></tr>'
737                       % (self._diff_plus_minus(is_added), l))
738
739        def dump_chk(chk, is_added=None):
740            dump_line('<i>%s</i>:' % chk, is_added)
741
742        if prev_m is not None:
743            removed, added, updated = m.diff_messages(prev_m)
744            for chk in removed:
745                dump_chk(chk, False)
746                for l in prev_m.items[chk].lines:
747                    dump_line(l, False)
748            for chk in updated:
749                dump_chk(chk)
750                for l in m.items[chk].diff_lines(prev_m.items[chk]):
751                    dump_line(l[1:], l.startswith('+'))
752            for chk in added:
753                dump_chk(chk, True)
754                for l in m.items[chk].lines:
755                    dump_line(l, True)
756        else:
757            for chk in m.items:
758                dump_chk(chk)
759                for l in m.items[chk].lines:
760                    dump_line(l)
761
762        self._dump('</table>')
763
764    def visit_checker_messages_in_state(self, s, prev_s=None):
765        m = s.checker_messages
766        prev_m = prev_s.checker_messages if prev_s is not None else None
767        if m is None and prev_m is None:
768            return
769
770        self._dump('<hr />')
771        self._dump('<tr><td align="left">'
772                   '<b>Checker State: </b>')
773        if m is None:
774            self._dump('<i> Nothing!</i>')
775        else:
776            if prev_m is not None:
777                if m.is_different(prev_m):
778                    self._dump('</td></tr><tr><td align="left">')
779                    self.visit_checker_messages(m, prev_m)
780                else:
781                    self._dump('<i> No changes!</i>')
782            else:
783                self._dump('</td></tr><tr><td align="left">')
784                self.visit_checker_messages(m)
785
786        self._dump('</td></tr>')
787
788    def visit_state(self, s, prev_s):
789        self.visit_store_in_state(s, prev_s)
790        self.visit_environment_in_state('environment', 'Expressions',
791                                        s, prev_s)
792        self.visit_generic_map_in_state('constraints', 'Ranges',
793                                        s, prev_s)
794        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
795                                        s, prev_s)
796        self.visit_environment_in_state('constructing_objects',
797                                        'Objects Under Construction',
798                                        s, prev_s)
799        self.visit_checker_messages_in_state(s, prev_s)
800
801    def visit_node(self, node):
802        self._dump('%s [shape=record,'
803                   % (node.node_name()))
804        if self._dark_mode:
805            self._dump('color="white",fontcolor="gray80",')
806        self._dump('label=<<table border="0">')
807
808        self._dump('<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
809                   % ("gray20" if self._dark_mode else "gray70",
810                      node.state.state_id
811                      if node.state is not None else 'Unspecified'))
812        if not self._topo_mode:
813            self._dump('<tr><td align="left" width="0">')
814            if len(node.points) > 1:
815                self._dump('<b>Program points:</b></td></tr>')
816            else:
817                self._dump('<b>Program point:</b></td></tr>')
818        self._dump('<tr><td align="left" width="0">'
819                   '<table border="0" align="left" width="0">')
820        for p in node.points:
821            self.visit_program_point(p)
822        self._dump('</table></td></tr>')
823
824        if node.state is not None and not self._topo_mode:
825            prev_s = None
826            # Do diffs only when we have a unique predecessor.
827            # Don't do diffs on the leaf nodes because they're
828            # the important ones.
829            if self._do_diffs and len(node.predecessors) == 1 \
830               and len(node.successors) > 0:
831                prev_s = self._graph.nodes[node.predecessors[0]].state
832            self.visit_state(node.state, prev_s)
833        self._dump_raw('</table>>];\n')
834
835    def visit_edge(self, pred, succ):
836        self._dump_raw('%s -> %s%s;\n' % (
837            pred.node_name(), succ.node_name(),
838            ' [color="white"]' if self._dark_mode else ''
839        ))
840
841    def visit_end_of_graph(self):
842        self._dump_raw('}\n')
843
844        if not self._dump_dot_only:
845            import sys
846            import tempfile
847
848            def write_temp_file(suffix, data):
849                fd, filename = tempfile.mkstemp(suffix=suffix)
850                print('Writing "%s"...' % filename)
851                with os.fdopen(fd, 'w') as fp:
852                    fp.write(data)
853                print('Done! Please remember to remove the file.')
854                return filename
855
856            try:
857                import graphviz
858            except ImportError:
859                # The fallback behavior if graphviz is not installed!
860                print('Python graphviz not found. Please invoke')
861                print('  $ pip install graphviz')
862                print('in order to enable automatic conversion to HTML.')
863                print()
864                print('You may also convert DOT to SVG manually via')
865                print('  $ dot -Tsvg input.dot -o output.svg')
866                print()
867                write_temp_file('.dot', self.output())
868                return
869
870            svg = graphviz.pipe('dot', 'svg', self.output())
871
872            filename = write_temp_file(
873                '.html', '<html><body bgcolor="%s">%s</body></html>' % (
874                             '#1a1a1a' if self._dark_mode else 'white', svg))
875            if sys.platform == 'win32':
876                os.startfile(filename)
877            elif sys.platform == 'darwin':
878                os.system('open "%s"' % filename)
879            else:
880                os.system('xdg-open "%s"' % filename)
881
882
883#===-----------------------------------------------------------------------===#
884# Explorers know how to traverse the ExplodedGraph in a certain order.
885# They would invoke a Visitor on every node or edge they encounter.
886#===-----------------------------------------------------------------------===#
887
888
889# BasicExplorer explores the whole graph in no particular order.
890class BasicExplorer:
891    def explore(self, graph, visitor):
892        visitor.visit_begin_graph(graph)
893        for node in sorted(graph.nodes):
894            logging.debug('Visiting ' + node)
895            visitor.visit_node(graph.nodes[node])
896            for succ in sorted(graph.nodes[node].successors):
897                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
898                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
899        visitor.visit_end_of_graph()
900
901
902#===-----------------------------------------------------------------------===#
903# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
904# Trimmers can be combined together by applying them sequentially.
905#===-----------------------------------------------------------------------===#
906
907
908# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
909# Useful when the trimmed graph is still too large.
910class SinglePathTrimmer:
911    def trim(self, graph):
912        visited_nodes = set()
913        node_id = graph.root_id
914        while True:
915            visited_nodes.add(node_id)
916            node = graph.nodes[node_id]
917            if len(node.successors) > 0:
918                succ_id = node.successors[0]
919                succ = graph.nodes[succ_id]
920                node.successors = [succ_id]
921                succ.predecessors = [node_id]
922                if succ_id in visited_nodes:
923                    break
924                node_id = succ_id
925            else:
926                break
927        graph.nodes = {node_id: graph.nodes[node_id]
928                       for node_id in visited_nodes}
929
930
931# TargetedTrimmer keeps paths that lead to specific nodes and discards all
932# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
933# a crash).
934class TargetedTrimmer:
935    def __init__(self, target_nodes):
936        self._target_nodes = target_nodes
937
938    @staticmethod
939    def parse_target_node(node, graph):
940        if node.startswith('0x'):
941            ret = 'Node' + node
942            assert ret in graph.nodes
943            return ret
944        else:
945            for other_id in graph.nodes:
946                other = graph.nodes[other_id]
947                if other.node_id == int(node):
948                    return other_id
949
950    @staticmethod
951    def parse_target_nodes(target_nodes, graph):
952        return [TargetedTrimmer.parse_target_node(node, graph)
953                for node in target_nodes.split(',')]
954
955    def trim(self, graph):
956        queue = self._target_nodes
957        visited_nodes = set()
958
959        while len(queue) > 0:
960            node_id = queue.pop()
961            visited_nodes.add(node_id)
962            node = graph.nodes[node_id]
963            for pred_id in node.predecessors:
964                if pred_id not in visited_nodes:
965                    queue.append(pred_id)
966        graph.nodes = {node_id: graph.nodes[node_id]
967                       for node_id in visited_nodes}
968        for node_id in graph.nodes:
969            node = graph.nodes[node_id]
970            node.successors = [succ_id for succ_id in node.successors
971                               if succ_id in visited_nodes]
972            node.predecessors = [succ_id for succ_id in node.predecessors
973                                 if succ_id in visited_nodes]
974
975
976#===-----------------------------------------------------------------------===#
977# The entry point to the script.
978#===-----------------------------------------------------------------------===#
979
980
981def main():
982    parser = argparse.ArgumentParser(
983        description='Display and manipulate Exploded Graph dumps.')
984    parser.add_argument('filename', type=str,
985                        help='the .dot file produced by the Static Analyzer')
986    parser.add_argument('-v', '--verbose', action='store_const',
987                        dest='loglevel', const=logging.DEBUG,
988                        default=logging.WARNING,
989                        help='enable info prints')
990    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
991                        const=True, default=False,
992                        help='display differences between states')
993    parser.add_argument('-t', '--topology', action='store_const',
994                        dest='topology', const=True, default=False,
995                        help='only display program points, omit states')
996    parser.add_argument('-s', '--single-path', action='store_const',
997                        dest='single_path', const=True, default=False,
998                        help='only display the leftmost path in the graph '
999                             '(useful for trimmed graphs that still '
1000                             'branch too much)')
1001    parser.add_argument('--to', type=str, default=None,
1002                        help='only display execution paths from the root '
1003                             'to the given comma-separated list of nodes '
1004                             'identified by a pointer or a stable ID; '
1005                             'compatible with --single-path')
1006    parser.add_argument('--dark', action='store_const', dest='dark',
1007                        const=True, default=False,
1008                        help='dark mode')
1009    parser.add_argument('--gray', action='store_const', dest='gray',
1010                        const=True, default=False,
1011                        help='black-and-white mode')
1012    parser.add_argument('--dump-dot-only', action='store_const',
1013                        dest='dump_dot_only', const=True, default=False,
1014                        help='instead of writing an HTML file and immediately '
1015                             'displaying it, dump the rewritten dot file '
1016                             'to stdout')
1017    args = parser.parse_args()
1018    logging.basicConfig(level=args.loglevel)
1019
1020    graph = ExplodedGraph()
1021    with open(args.filename) as fd:
1022        for raw_line in fd:
1023            raw_line = raw_line.strip()
1024            graph.add_raw_line(raw_line)
1025
1026    trimmers = []
1027    if args.to is not None:
1028        trimmers.append(TargetedTrimmer(
1029            TargetedTrimmer.parse_target_nodes(args.to, graph)))
1030    if args.single_path:
1031        trimmers.append(SinglePathTrimmer())
1032
1033    explorer = BasicExplorer()
1034
1035    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
1036                             args.dump_dot_only)
1037
1038    for trimmer in trimmers:
1039        trimmer.trim(graph)
1040
1041    explorer.explore(graph, visitor)
1042
1043
1044if __name__ == '__main__':
1045    main()
1046