• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2009 Google Inc. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7#    * Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9#    * Redistributions in binary form must reproduce the above
10# copyright notice, this list of conditions and the following disclaimer
11# in the documentation and/or other materials provided with the
12# distribution.
13#    * Neither the name of Google Inc. nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29"""WebKit's Python module for interacting with patches."""
30
31import logging
32import re
33
34
35_regexp_compile_cache = {}
36
37
38def match(pattern, string):
39    """Matches the string with the pattern, caching the compiled regexp."""
40    if not pattern in _regexp_compile_cache:
41        _regexp_compile_cache[pattern] = re.compile(pattern)
42    return _regexp_compile_cache[pattern].match(string)
43
44
45def git_diff_to_svn_diff(line):
46    """Converts a git formatted diff line to a svn formatted line.
47
48    Args:
49      line: A string representing a line of the diff.
50    """
51    conversion_patterns = (("^diff --git a/(.+) b/(?P<FilePath>.+)", lambda matched: "Index: " + matched.group('FilePath') + "\n"),
52                           ("^new file.*", lambda matched: "\n"),
53                           ("^index [0-9a-f]{7}\.\.[0-9a-f]{7} [0-9]{6}", lambda matched: "===================================================================\n"),
54                           ("^--- a/(?P<FilePath>.+)", lambda matched: "--- " + matched.group('FilePath') + "\n"),
55                           ("^\+\+\+ b/(?P<FilePath>.+)", lambda matched: "+++ " + matched.group('FilePath') + "\n"))
56
57    for pattern, conversion in conversion_patterns:
58        matched = match(pattern, line)
59        if matched:
60            return conversion(matched)
61    return line
62
63
64def get_diff_converter(first_diff_line):
65    """Gets a converter function of diff lines.
66
67    Args:
68      first_diff_line: The first filename line of a diff file.
69                       If this line is git formatted, we'll return a
70                       converter from git to SVN.
71    """
72    if match(r"^diff --git a/", first_diff_line):
73        return git_diff_to_svn_diff
74    return lambda input: input
75
76
77_INITIAL_STATE = 1
78_DECLARED_FILE_PATH = 2
79_PROCESSING_CHUNK = 3
80
81
82class DiffFile:
83    """Contains the information for one file in a patch.
84
85    The field "lines" is a list which contains tuples in this format:
86       (deleted_line_number, new_line_number, line_string)
87    If deleted_line_number is zero, it means this line is newly added.
88    If new_line_number is zero, it means this line is deleted.
89    """
90
91    def __init__(self, filename):
92        self.filename = filename
93        self.lines = []
94
95    def add_new_line(self, line_number, line):
96        self.lines.append((0, line_number, line))
97
98    def add_deleted_line(self, line_number, line):
99        self.lines.append((line_number, 0, line))
100
101    def add_unchanged_line(self, deleted_line_number, new_line_number, line):
102        self.lines.append((deleted_line_number, new_line_number, line))
103
104
105class DiffParser:
106    """A parser for a patch file.
107
108    The field "files" is a dict whose key is the filename and value is
109    a DiffFile object.
110    """
111
112    def __init__(self, diff_input):
113        """Parses a diff.
114
115        Args:
116          diff_input: An iterable object.
117        """
118        state = _INITIAL_STATE
119
120        self.files = {}
121        current_file = None
122        old_diff_line = None
123        new_diff_line = None
124        for line in diff_input:
125            line = line.rstrip("\n")
126            if state == _INITIAL_STATE:
127                transform_line = get_diff_converter(line)
128            line = transform_line(line)
129
130            file_declaration = match(r"^Index: (?P<FilePath>.+)", line)
131            if file_declaration:
132                filename = file_declaration.group('FilePath')
133                current_file = DiffFile(filename)
134                self.files[filename] = current_file
135                state = _DECLARED_FILE_PATH
136                continue
137
138            lines_changed = match(r"^@@ -(?P<OldStartLine>\d+)(,\d+)? \+(?P<NewStartLine>\d+)(,\d+)? @@", line)
139            if lines_changed:
140                if state != _DECLARED_FILE_PATH and state != _PROCESSING_CHUNK:
141                    logging.error('Unexpected line change without file path declaration: %r' % line)
142                old_diff_line = int(lines_changed.group('OldStartLine'))
143                new_diff_line = int(lines_changed.group('NewStartLine'))
144                state = _PROCESSING_CHUNK
145                continue
146
147            if state == _PROCESSING_CHUNK:
148                if line.startswith('+'):
149                    current_file.add_new_line(new_diff_line, line[1:])
150                    new_diff_line += 1
151                elif line.startswith('-'):
152                    current_file.add_deleted_line(old_diff_line, line[1:])
153                    old_diff_line += 1
154                elif line.startswith(' '):
155                    current_file.add_unchanged_line(old_diff_line, new_diff_line, line[1:])
156                    old_diff_line += 1
157                    new_diff_line += 1
158                elif line == '\\ No newline at end of file':
159                    # Nothing to do.  We may still have some added lines.
160                    pass
161                else:
162                    logging.error('Unexpected diff format when parsing a chunk: %r' % line)
163