• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Helps compare openjdk_java_files contents against upstream file contents.
18
19Outputs a tab-separated table comparing each openjdk_java_files entry
20against OpenJDK upstreams. This can help verify updates to later upstreams
21or focus attention towards files that may have been missed in a previous
22update (http://b/36461944) or are otherwise surprising (http://b/36429512).
23
24 - Identifies each file as identical to, different from or missing from
25   each upstream; diffs are not produced.
26 - Optionally, copies all openjdk_java_files from the default upstream
27   (eg. OpenJDK8u121-b13) to a new directory, for easy directory comparison
28   using e.g. kdiff3, which allows inspecting detailed diffs.
29 - The ANDROID_BUILD_TOP environment variable must be set to point to the
30   AOSP root directory (parent of libcore).
31 - Run with -h command line argument to get usage instructions.
32
33To check out upstreams OpenJDK 7u40, 8u60 and 8u121-b13, run:
34
35mkdir openjdk
36cd openjdk
37hg clone http://hg.openjdk.java.net/jdk7u/jdk7u40/ 7u40
38(cd !$ ; sh get_source.sh)
39hg clone http://hg.openjdk.java.net/jdk8u/jdk8u 8u121-b13
40(cd !$ ; hg update -r jdk8u121-b13 && sh get_source.sh)
41hg clone http://hg.openjdk.java.net/jdk8u/jdk8u60/ 8u60
42(cd !$ ; sh get_source.sh)
43
44The newly created openjdk directory is then a suitable argument for the
45--upstream_root parameter.
46"""
47
48import argparse
49import csv
50import filecmp
51import os
52import re
53import shutil
54import sys
55
56def rel_paths_from_makefile(build_top):
57    """Returns the list of relative paths to .java files parsed from openjdk_java_files.mk"""
58    list_file = os.path.join(build_top, "libcore", "openjdk_java_files.mk")
59
60    result = []
61    with open(list_file, "r") as f:
62        for line in f:
63            match = re.match("\s+ojluni/src/main/java/(.+\.java)\s*\\\s*", line)
64            if match:
65                path = match.group(1)
66                # convert / to the appropriate separator (e.g. \ on Windows), just in case
67                path = os.path.normpath(path)
68                result.append(path)
69    return result
70
71def ojluni_path(build_top, rel_path):
72    """The full path of the file at the given rel_path in ojluni"""
73    return os.path.join(build_top, "libcore", "ojluni", "src", "main", "java", rel_path)
74
75def upstream_path(upstream_root, upstream, rel_path):
76    """The full path of the file at the given rel_path in the given upstream"""
77    source_dirs = [
78        "jdk/src/share/classes",
79        "jdk/src/solaris/classes"
80    ]
81    for source_dir in source_dirs:
82        source_dir = os.path.normpath(source_dir)
83        result = os.path.join(upstream_root, upstream, source_dir, rel_path)
84        if os.path.exists(result):
85            return result
86    return None
87
88
89# For files with N and M lines, respectively, this runs in time
90# O(N+M) if the files are identical or O(N*M) if not. This could
91# be improved to O(D*(N+M)) for files with at most D lines
92# difference by only considering array elements within D cells
93# from the diagonal.
94def edit_distance_lines(file_a, file_b):
95    """
96    Computes the line-based edit distance between two text files, i.e.
97    the smallest number of line deletions, additions or replacements
98    that would transform the content of one file into that of the other.
99    """
100    if filecmp.cmp(file_a, file_b, shallow=False):
101        return 0 # files identical
102    with open(file_a) as f:
103        lines_a = f.readlines()
104    with open(file_b) as f:
105        lines_b = f.readlines()
106    prev_cost = range(0, len(lines_b) + 1)
107    for end_a in range(1, len(lines_a) + 1):
108        # For each valid index i, prev_cost[i] is the edit distance between
109        # lines_a[:end_a-1] and lines_b[:i].
110        # We now calculate cur_cost[end_b] as the edit distance between
111        # line_a[:end_a] and lines_b[:end_b]
112        cur_cost = [end_a]
113        for end_b in range(1, len(lines_b) + 1):
114            c = min(
115                cur_cost[-1] + 1, # append line from b
116                prev_cost[end_b] + 1, # append line from a
117                # match or replace line
118                prev_cost[end_b - 1] + (0 if lines_a[end_a - 1] == lines_b[end_b - 1] else 1)
119                )
120            cur_cost.append(c)
121        prev_cost = cur_cost
122    return prev_cost[-1]
123
124def compare_to_upstreams_and_save(out_file, build_top, upstream_root, upstreams, rel_paths, best_only=False):
125    """
126    Prints tab-separated values comparing ojluni files vs. each
127    upstream, for each of the rel_paths, suitable for human
128    analysis in a spreadsheet.
129    This includes whether the corresponding upstream file is
130    missing, identical, or by how many lines it differs, and
131    a guess as to the correct upstream based on minimal line
132    difference (ties broken in favor of upstreams that occur
133    earlier in the list).
134    """
135    writer = csv.writer(out_file, delimiter='\t')
136    writer.writerow(["rel_path", "guessed_upstream"] + upstreams)
137    for rel_path in rel_paths:
138        ojluni_file = ojluni_path(build_top, rel_path)
139        upstream_comparisons = []
140        best_distance = sys.maxint
141        guessed_upstream = ""
142        for upstream in upstreams:
143            upstream_file = upstream_path(upstream_root, upstream, rel_path)
144            if upstream_file is None:
145                upstream_comparison = "missing"
146            else:
147                edit_distance = edit_distance_lines(upstream_file, ojluni_file)
148                if edit_distance == 0:
149                    upstream_comparison = "identical"
150                else:
151                    upstream_comparison = "different (%d lines)" % (edit_distance)
152                if edit_distance < best_distance:
153                    best_distance = edit_distance
154                    guessed_upstream = upstream
155            upstream_comparisons.append(upstream_comparison)
156        writer.writerow([rel_path, guessed_upstream ] + upstream_comparisons)
157
158def copy_files(rel_paths, upstream_root, upstream, output_dir):
159    """Copies files at the given rel_paths from upstream to output_dir"""
160    for rel_path in rel_paths:
161        upstream_file = upstream_path(upstream_root, upstream, rel_path)
162        if upstream_file is not None:
163            out_file = os.path.join(output_dir, rel_path)
164            out_dir = os.path.dirname(out_file)
165            if not os.path.exists(out_dir):
166                os.makedirs(out_dir)
167            shutil.copyfile(upstream_file, out_file)
168
169def main():
170    parser = argparse.ArgumentParser(
171    description="Check openjdk_java_files contents against upstream file contents.")
172    parser.add_argument("--upstream_root",
173        help="Path below where upstream sources are checked out. This should be a "
174            "directory with one child directory for each upstream (select the "
175            "upstreams to compare against via --upstreams).",
176        required=True,)
177    parser.add_argument("--upstreams",
178        default="8u121-b13,8u60,7u40",
179        help="Comma separated list of subdirectory names of --upstream_root that "
180            "each hold one upstream.")
181    parser.add_argument("--output_dir",
182        help="(optional) path where default upstream sources should be copied to; "
183            "this path must not yet exist and will be created. "
184            "The default upstream is the one that occurs first in --upstreams.")
185    parser.add_argument("--build_top",
186        default=os.environ.get('ANDROID_BUILD_TOP'),
187        help="Path where Android sources are checked out (defaults to $ANDROID_BUILD_TOP).")
188    args = parser.parse_args()
189    if args.output_dir is not None and os.path.exists(args.output_dir):
190        raise Exception("Output dir already exists: " + args.output_dir)
191
192    upstreams = [upstream.strip() for upstream in args.upstreams.split(',')]
193    default_upstream = upstreams[0]
194    for upstream in upstreams:
195        upstream_path = os.path.join(args.upstream_root, upstream)
196        if not os.path.exists(upstream_path):
197            raise Exception("Upstream not found: " + upstream_path)
198
199    rel_paths = rel_paths_from_makefile(args.build_top)
200
201    compare_to_upstreams_and_save(
202        sys.stdout, args.build_top, args.upstream_root, upstreams, rel_paths)
203
204    if args.output_dir is not None:
205        copy_files(rel_paths, args.upstream_root, default_upstream, args.output_dir)
206
207if __name__ == '__main__':
208    main()
209