1#!/usr/bin/env python3 2## 3## Copyright (c) 2016, Alliance for Open Media. All rights reserved 4## 5## This source code is subject to the terms of the BSD 2 Clause License and 6## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 7## was not distributed with this source code in the LICENSE file, you can 8## obtain it at www.aomedia.org/license/software. If the Alliance for Open 9## Media Patent License 1.0 was not distributed with this source code in the 10## PATENTS file, you can obtain it at www.aomedia.org/license/patent. 11## 12"""Classes for representing diff pieces.""" 13 14__author__ = "jkoleszar@google.com" 15 16import re 17 18 19class DiffLines(object): 20 """A container for one half of a diff.""" 21 22 def __init__(self, filename, offset, length): 23 self.filename = filename 24 self.offset = offset 25 self.length = length 26 self.lines = [] 27 self.delta_line_nums = [] 28 29 def Append(self, line): 30 l = len(self.lines) 31 if line[0] != " ": 32 self.delta_line_nums.append(self.offset + l) 33 self.lines.append(line[1:]) 34 assert l+1 <= self.length 35 36 def Complete(self): 37 return len(self.lines) == self.length 38 39 def __contains__(self, item): 40 return item >= self.offset and item <= self.offset + self.length - 1 41 42 43class DiffHunk(object): 44 """A container for one diff hunk, consisting of two DiffLines.""" 45 46 def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b): 47 self.header = header 48 self.left = DiffLines(file_a, start_a, len_a) 49 self.right = DiffLines(file_b, start_b, len_b) 50 self.lines = [] 51 52 def Append(self, line): 53 """Adds a line to the DiffHunk and its DiffLines children.""" 54 if line[0] == "-": 55 self.left.Append(line) 56 elif line[0] == "+": 57 self.right.Append(line) 58 elif line[0] == " ": 59 self.left.Append(line) 60 self.right.Append(line) 61 elif line[0] == "\\": 62 # Ignore newline messages from git diff. 63 pass 64 else: 65 assert False, ("Unrecognized character at start of diff line " 66 "%r" % line[0]) 67 self.lines.append(line) 68 69 def Complete(self): 70 return self.left.Complete() and self.right.Complete() 71 72 def __repr__(self): 73 return "DiffHunk(%s, %s, len %d)" % ( 74 self.left.filename, self.right.filename, 75 max(self.left.length, self.right.length)) 76 77 78def ParseDiffHunks(stream): 79 """Walk a file-like object, yielding DiffHunks as they're parsed.""" 80 81 file_regex = re.compile(r"(\+\+\+|---) (\S+)") 82 range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?") 83 hunk = None 84 while True: 85 line = stream.readline() 86 if not line: 87 break 88 89 if hunk is None: 90 # Parse file names 91 diff_file = file_regex.match(line) 92 if diff_file: 93 if line.startswith("---"): 94 a_line = line 95 a = diff_file.group(2) 96 continue 97 if line.startswith("+++"): 98 b_line = line 99 b = diff_file.group(2) 100 continue 101 102 # Parse offset/lengths 103 diffrange = range_regex.match(line) 104 if diffrange: 105 if diffrange.group(2): 106 start_a = int(diffrange.group(1)) 107 len_a = int(diffrange.group(3)) 108 else: 109 start_a = 1 110 len_a = int(diffrange.group(1)) 111 112 if diffrange.group(5): 113 start_b = int(diffrange.group(4)) 114 len_b = int(diffrange.group(6)) 115 else: 116 start_b = 1 117 len_b = int(diffrange.group(4)) 118 119 header = [a_line, b_line, line] 120 hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b) 121 else: 122 # Add the current line to the hunk 123 hunk.Append(line) 124 125 # See if the whole hunk has been parsed. If so, yield it and prepare 126 # for the next hunk. 127 if hunk.Complete(): 128 yield hunk 129 hunk = None 130 131 # Partial hunks are a parse error 132 assert hunk is None 133