#!/usr/bin/env python3 # Copyright 2016 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS-IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from concurrent import futures import itertools import sys import re import pygraphviz as gv import ply.lex as lex import ply.yacc as yacc from functools import lru_cache as memoize diagnostic_header_pattern = re.compile('[^ ]+\.[^ ]+:[0-9]+:[0-9]+: ([^ ]*): (.*)') in_file_included_from_pattern = re.compile('In file included from .*:') in_instantiation_of_template_pattern = re.compile('in instantiation of (.*) (?:requested|required) here') static_warning_marked_deprecated_here_pattern = re.compile('\'static_warning\' has been explicitly marked deprecated here') class Diagnostic: def __init__(self, kind, message): self.kind = kind self.message = message self.template_instantiation_trace = [] tokens = ( 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', 'LBRACE', 'RBRACE', 'LESS_THAN', 'GREATER_THAN', 'DOUBLE_COLON', 'COMMA', 'IDENTIFIER', 'ASTERISK', 'AMPERSAND', ) t_LPAREN = r'\(' t_RPAREN = r'\)' t_LBRACKET = r'\[' t_RBRACKET = r'\]' t_LBRACE = r'}' t_RBRACE = r'{' t_LESS_THAN = r'<' t_GREATER_THAN = r'>' t_DOUBLE_COLON = r'::' t_COMMA = r',' t_ASTERISK = r'\*' t_AMPERSAND = r'&' # We conflate numbers as identifiers too, we don't care about the difference. t_IDENTIFIER = r'[a-zA-Z0-9_]+' t_ignore = ' \t' def t_error(t): raise Exception("Illegal character '%s' followed by %s" % (t.value[0], t.value[1:])) class LayoutNeedsMultipleLinesException(Exception): pass class AstNode: def __str__(self): return ''.join(self) class TerminalAstNode(AstNode): def __init__(self, s): self.s = s self.is_multiline = (s == '\n') # last_line_length is the string length if s is not a multiline string. # For multiline strings ending in a newline, this is 0. if self.is_multiline: self.first_line_length = 0 self.last_line_length = 0 self.max_line_length = 0 else: # This never happens ATM, so we don't handle it. assert not '\n' in s self.first_line_length = len(s) self.last_line_length = len(s) self.max_line_length = len(s) def __iter__(self): return iter((self.s,)) class NonTerminalAstNode(AstNode): def __init__(self, children_ast_nodes): self.children_ast_nodes = children_ast_nodes first_line_length = 0 last_line_length = 0 is_multiline = False max_line_length = 0 for node in children_ast_nodes: if node.is_multiline: last_line_length = node.last_line_length max_line_length = max(max_line_length, last_line_length + node.first_line_length, node.max_line_length) is_multiline = True else: last_line_length += node.last_line_length max_line_length = max(max_line_length, last_line_length) self.first_line_length = first_line_length self.last_line_length = last_line_length self.is_multiline = is_multiline self.max_line_length = max_line_length def __iter__(self): return itertools.chain(*self.children_ast_nodes) max_line_length = 80 # Size of an indent in spaces. single_indent_length = 4 class TerminalNodeFactory(): def __init__(self, s): self.s = s def __call__(self, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only): return TerminalAstNode(self.s) # 'balanced_string' nodes evaluate to a function (or a callable object) taking these parameters: # current_indent (integer): the indentation in the current line (spaces only) # current_line_length (integer): the number of preceding characters in the current line (>=current_indent) # inside_meta_type (boolean): whether we're inside a Type<...> # last_token_was_type_wrapper (boolean): whether the immediately-preceding token was the identifier 'Type' # and returning an AstNode # 'comma_separated_balanced_string' nodes evaluate to a tuple of such functions def p_comma_separated_balanced_string_empty(p): 'comma_separated_balanced_string : ' p[0] = tuple() def p_comma_separated_balanced_string_not_empty(p): 'comma_separated_balanced_string : COMMA balanced_string comma_separated_balanced_string' p[0] = ( p[2], *(p[3]) ) def p_optional_balanced_string_empty(p): 'optional_balanced_string : ' p[0] = TerminalNodeFactory('') def p_optional_balanced_string_not_empty(p): 'optional_balanced_string : balanced_string' p[0] = p[1] class BalancedStringTerminalNodeFactory(): def __init__(self, first_token, node_factory): self.first_token = first_token self.node_factory = node_factory def __call__(self, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only): terminal_node = TerminalAstNode(self.first_token) non_terminal_node = self.node_factory( current_indent, current_line_length + len(self.first_token), inside_meta_type, self.first_token == 'Type', accept_single_line_only) if non_terminal_node is None: return None return NonTerminalAstNode((terminal_node, non_terminal_node)) def p_balanced_string_terminal(p): '''balanced_string : DOUBLE_COLON balanced_string | IDENTIFIER optional_balanced_string | ASTERISK optional_balanced_string | AMPERSAND optional_balanced_string ''' first_token = p[1] node_factory = p[2] p[0] = BalancedStringTerminalNodeFactory(first_token, node_factory) def create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, accept_single_line_only): nodes = [] for node_factory, current_indent, inside_meta_type in node_factory_inside_meta_type_pairs: node = node_factory(current_indent, current_line_length, inside_meta_type, False, accept_single_line_only) if node is None: return None nodes.append(node) if node.is_multiline: if accept_single_line_only: raise Exception('Unexpected multiline, due to factory: ' + node_factory) # Note that due to the way we break lines, the last line will have the same indent as the first. # So we don't need to update current_indent here. current_line_length = node.last_line_length else: current_line_length += node.last_line_length return NonTerminalAstNode(nodes) def compute_layout(left_token, intermediate_node_factories, right_token, rhs_node_factory, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only): # We lay out the result in one of two ways: # # $previousIndent $previousContent LPAREN x1, x2, x3 RPAREN balanced_string # # Or: # # $previousIndent $previousContent LPAREN # $previousIndent $indent x1 , # $previousIndent $indent x2 , # $previousIndent $indent x3 RPAREN balanced_string entering_meta_type = last_token_was_type_wrapper # First, we try to use the first format if possible node_factory_inside_meta_type_pairs = [ (TerminalNodeFactory(left_token), current_indent, inside_meta_type), *((intermediate_node_factory, current_indent, (inside_meta_type or entering_meta_type)) for intermediate_node_factory in intermediate_node_factories), (TerminalNodeFactory(right_token), current_indent, inside_meta_type), (rhs_node_factory, current_indent, inside_meta_type), ] node_with_single_line_layout = create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, True) if node_with_single_line_layout is not None and node_with_single_line_layout.max_line_length <= max_line_length: assert not node_with_single_line_layout.is_multiline return node_with_single_line_layout if accept_single_line_only: return None # The result exceeds the line length, let's switch to the second one. node_factory_inside_meta_type_pairs = [ (TerminalNodeFactory(left_token), current_indent, inside_meta_type) ] new_indent_length = current_indent + single_indent_length comma_node_factory_inside_meta_type_pair = (TerminalNodeFactory(','), current_indent, inside_meta_type or entering_meta_type) newline_node_factory_inside_meta_type_pair = (TerminalNodeFactory('\n'), current_indent, inside_meta_type or entering_meta_type) indent_node_factory_inside_meta_type_pair = (TerminalNodeFactory(' ' * new_indent_length), current_indent, inside_meta_type or entering_meta_type) for inner_node_factory in intermediate_node_factories: node_factory_inside_meta_type_pairs.append(newline_node_factory_inside_meta_type_pair) node_factory_inside_meta_type_pairs.append(indent_node_factory_inside_meta_type_pair) node_factory_inside_meta_type_pairs.append((inner_node_factory, new_indent_length, inside_meta_type or entering_meta_type)) node_factory_inside_meta_type_pairs.append(comma_node_factory_inside_meta_type_pair) node_factory_inside_meta_type_pairs.pop() node_factory_inside_meta_type_pairs.append((TerminalNodeFactory(right_token), current_indent, inside_meta_type)) node_factory_inside_meta_type_pairs.append((rhs_node_factory, current_indent, inside_meta_type)) return create_composite_node_from_factories(node_factory_inside_meta_type_pairs, current_line_length, accept_single_line_only) def p_balanced_string_with_balanced_token_no_comma_separated_elems(p): '''balanced_string : LPAREN RPAREN optional_balanced_string | LBRACKET RBRACKET optional_balanced_string | LBRACE RBRACE optional_balanced_string | LESS_THAN GREATER_THAN optional_balanced_string ''' p_1 = p[1] p_2 = p[2] p_3 = p[3] def result(current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only): return compute_layout(p_1, [], p_2, p_3, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only) p[0] = result def p_balanced_string_with_balanced_token_some_comma_separated_elems(p): '''balanced_string : LPAREN balanced_string comma_separated_balanced_string RPAREN optional_balanced_string | LBRACKET balanced_string comma_separated_balanced_string RBRACKET optional_balanced_string | LBRACE balanced_string comma_separated_balanced_string RBRACE optional_balanced_string | LESS_THAN balanced_string comma_separated_balanced_string GREATER_THAN optional_balanced_string ''' p_1 = p[1] p_2 = p[2] p_3 = p[3] p_4 = p[4] p_5 = p[5] def result(current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only): if not inside_meta_type: if p_1 == '(' and p_4 == ')': if len(p_3) == 0: if isinstance(p_2, BalancedStringTerminalNodeFactory) and p_2.first_token == '*': if isinstance(p_2.node_factory, TerminalNodeFactory) and p_2.node_factory.s == '': # Special case: we're not inside a Type<...> and we've encountered a '(*)'. # Discard it and just print the rhs. return p_5(current_indent, current_line_length, inside_meta_type, False, accept_single_line_only) return compute_layout(p_1, (p_2, *(p_3)), p_4, p_5, current_indent, current_line_length, inside_meta_type, last_token_was_type_wrapper, accept_single_line_only) p[0] = result def p_error(p): raise Exception("Syntax error when parsing meta type: ", p[:]) lexer = lex.lex() parser = yacc.yacc(start='balanced_string') strings_to_remove = re.compile(r'template class |template type alias |function template specialization |member class |member function |default argument for |fruit::impl::meta::|fruit::impl::|fruit::') def do_simplify_template_trace_element(element): element, _ = re.subn(strings_to_remove, '', element) element = element.strip() if element[0] != '\'' or element[-1] != '\'': raise Exception('Expected single quotes in: ' + element) element = element[1:-1] if element.startswith('DoEval<') and element[-1] == '>': element = element[7:-1] result = ''.join(parser.parse(element, lexer)(0, 0, False, False, False)) return result @memoize(maxsize=1000) def simplify_template_trace_element(element, executor): return executor.submit(do_simplify_template_trace_element, element) def to_dot_left_justified_string(s): return '\\l'.join(s.splitlines() + ['']) def main(): diagnostics = [] with futures.ProcessPoolExecutor() as executor: lines = sys.stdin.readlines() for line_number, line in enumerate(lines): # Remove the newline line = line[:-1] matches = in_file_included_from_pattern.search(line) if matches: continue matches = diagnostic_header_pattern.search(line) if matches: diagnostic_kind, diagnostic_message = matches.groups() if diagnostic_kind == 'error': diagnostics.append(Diagnostic(diagnostic_kind, diagnostic_message)) print('Processing diagnostic. (%s / %s) ' % (line_number, len(lines)), file=sys.stderr) elif diagnostic_kind == 'note': matches = in_instantiation_of_template_pattern.search(diagnostic_message) if matches: if not diagnostics: raise Exception('Found template instantiation note before any error diagnostic: %s' % diagnostic_message) if 'in instantiation of template type alias' in line: pass else: group = matches.groups()[0] trace_element_future = simplify_template_trace_element(group, executor) diagnostics[-1].template_instantiation_trace.append(trace_element_future) continue matches = static_warning_marked_deprecated_here_pattern.search(diagnostic_message) if matches: continue raise Exception('Found unknown note: %s' % diagnostic_message) call_graph = {} graph = gv.AGraph(directed=True) for diagnostic_index, diagnostic in enumerate(diagnostics): if diagnostic_index % 10 == 0: print('Constructing dep graph: iteration %s/%s' % (diagnostic_index, len(diagnostics)), file=sys.stderr) template_instantiation_trace = [trace_element_future.result() for trace_element_future in diagnostic.template_instantiation_trace] for called, caller in zip(template_instantiation_trace[1:], template_instantiation_trace[2:]): if called in call_graph and call_graph[called] != caller: # Avoid this edge, so that the resulting graph is a tree continue graph.add_edge(to_dot_left_justified_string(caller), to_dot_left_justified_string(called)) call_graph[called] = caller print(graph) if __name__ == '__main__': main()