1#!/usr/bin/python3 -B 2 3# Copyright 2021 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""Read the EXPECTED_UPSTREAM and update the files from the upstream.""" 17import argparse 18import logging 19# pylint: disable=g-importing-member 20from pathlib import Path 21import sys 22from typing import List 23from typing import Sequence 24 25# pylint: disable=g-multiple-import 26from common_util import ( 27 ExpectedUpstreamEntry, 28 ExpectedUpstreamFile, 29 has_file_in_tree, 30 LIBCORE_DIR, 31) 32 33from git import ( 34 Blob, 35 IndexFile, 36 Repo, 37) 38 39# Enable INFO logging for error emitted by GitPython 40logging.basicConfig(level=logging.INFO) 41 42# Pick an arbitrary existing commit with an empty tree 43EMPTY_COMMIT_SHA = "d85bc16ba1cdcc20bec6fcbfe46dc90f9fcd2f78" 44 45 46def validate_and_remove_updated_entries( 47 entries: List[ExpectedUpstreamEntry], 48 repo: Repo) -> List[ExpectedUpstreamEntry]: 49 """Returns a list of entries of which the file content needs to be updated.""" 50 head_tree = repo.head.commit.tree 51 result: List[ExpectedUpstreamEntry] = [] 52 53 for e in entries: 54 try: 55 # The following step validate each entry by querying the git database 56 commit = repo.commit(e.git_ref) 57 source_blob = commit.tree.join(e.src_path) 58 if not has_file_in_tree(e.dst_path, head_tree): 59 # Add the entry if the file is missing in the HEAD 60 result.append(e) 61 continue 62 63 dst_blob = head_tree.join(e.dst_path) 64 # Add the entry if the content is different. 65 # data_stream will be close during GC. 66 if source_blob.data_stream.read() != dst_blob.data_stream.read(): 67 result.append(e) 68 except: 69 print(f"ERROR: reading entry: {e}", file=sys.stderr) 70 raise 71 72 return result 73 74 75def partition_entries_by_ref( 76 entries: List[ExpectedUpstreamEntry]) -> List[List[ExpectedUpstreamEntry]]: 77 result_map = {} 78 for e in entries: 79 if result_map.get(e.git_ref) is None: 80 result_map[e.git_ref] = [] 81 result_map[e.git_ref].append(e) 82 83 return list(result_map.values()) 84 85 86THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR) 87MSG_FIRST_COMMIT = ("Import {summary} from {ref}\n" 88 "\n" 89 "List of files:\n" 90 " {files}\n" 91 "\n" 92 f"Generated by {THIS_TOOL_PATH}" 93 "\n" 94 "Test: N/A") 95 96MSG_SECOND_COMMIT = ("Merge {summary} from {ref} into the " 97 "expected_upstream branch\n" 98 "\n" 99 "List of files:\n" 100 " {files}\n" 101 "\n" 102 f"Generated by {THIS_TOOL_PATH}" 103 "\n" 104 "Test: N/A") 105 106 107def merge_files_and_create_commit(entry_set: List[ExpectedUpstreamEntry], 108 repo: Repo, checkout_only: bool) -> None: 109 r"""Create the commits importing the given files into the current branch. 110 111 `--------<ref>--------------- aosp/upstream_openjdkXXX 112 \ 113 <first_commit> 114 \ 115 -------<second_commit>------ expected_upstream 116 117 This function creates the 2 commits, i.e. first_commit and second_commit, in 118 the diagram. The goal is to checkout a subset files specified in the 119 entry_set, and merged into the pected_upstream branch in order to keep the 120 git-blame history of the individual files. first_commit is needed in order 121 to move the files specified in the entry_set. 122 123 In the implementation, first_commit isn't really modified from the ref, but 124 created from an empty tree, and all files in entry_set will be added into 125 the first_commit, second_commit is a merged commit and modified from 126 the parent in the expected_upstream branch, and any file contents in the 127 first commit will override the file content in the second commit. 128 129 You may reference the following git commands for understanding which should 130 create the same commits, but the python implementation is cleaner, because 131 it doesn't change the working tree or create a new branch. 132 first_commit: 133 git checkout -b temp_branch <entry.git_ref> 134 rm -r * .jcheck/ .hgignore .hgtags # Remove hidden files 135 git checkout <entry.git_ref> <entry.src_path> 136 mkdir -p <entry.dst_path>.directory && git mv <entry.src_path> 137 <entry.dst_path> 138 git commit -a 139 second_commit: 140 git merge temp_branch 141 git checkout HEAD -- ojluni/ # Force checkout to resolve merge conflict 142 git checkout temp_branch -- <entry.dst_path> 143 git commit 144 145 Args: 146 entry_set: a list of entries 147 repo: the repository object 148 checkout_only: True if it creates no commit 149 """ 150 ref = entry_set[0].git_ref 151 upstream_commit = repo.commit(ref) 152 153 dst_paths = [e.dst_path for e in entry_set] 154 str_dst_paths = "\n ".join(dst_paths) 155 156 for entry in entry_set: 157 src_blob = upstream_commit.tree[entry.src_path] 158 # Write into the file system directly because GitPython provides no API 159 # writing into the index in memory. IndexFile.move doesn't help here, 160 # because the API requires the file on the working tree too. 161 # However, it's fine, because we later reset the HEAD to the second commit. 162 # The user expects the file showing in the file system, and the file is 163 # not staged/untracked because the file is in the second commit too. 164 absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path) 165 absolute_dst_path.parent.mkdir(parents=True, exist_ok=True) 166 with absolute_dst_path.open("wb") as file: 167 file.write(src_blob.data_stream.read()) 168 169 if not checkout_only: 170 # We need an index empty initially, i.e. no staged files. Note that the 171 # empty commit is not the parent. The parents can be set later. 172 first_index = IndexFile.from_tree(repo, repo.commit(EMPTY_COMMIT_SHA)) 173 for entry in entry_set: 174 first_index.add(entry.dst_path) 175 176 summary_msg = "files" 177 if len(entry_set) == 1: 178 summary_msg = Path(entry_set[0].dst_path).stem 179 msg = MSG_FIRST_COMMIT.format( 180 summary=summary_msg, ref=ref, files=str_dst_paths) 181 182 first_commit = first_index.commit( 183 message=msg, parent_commits=[upstream_commit], head=False) 184 185 # The second commit is a merge commit. It doesn't use the current index, 186 # i.e. repo.index, to avoid affecting the current staged files. 187 prev_head = repo.active_branch.commit 188 second_index = IndexFile.from_tree(repo, prev_head) 189 blob_filter = lambda obj, i: isinstance(obj, Blob) 190 blobs = first_commit.tree.traverse(blob_filter) 191 second_index.add(blobs) 192 msg = MSG_SECOND_COMMIT.format( 193 summary=summary_msg, ref=ref, files=str_dst_paths) 194 second_commit = second_index.commit( 195 message=msg, parent_commits=[prev_head, first_commit], head=True) 196 197 # We updated the HEAD to the second commit. Thus, git-reset updates the 198 # current index. Otherwise, the current index, aka, repo.index, shows that 199 # the files are deleted. 200 repo.index.reset() 201 202 if checkout_only: 203 print(f"Checked out the following files from {ref}:") 204 else: 205 print(f"New merge commit {second_commit} contains:") 206 print(f" {str_dst_paths}") 207 208 209def create_commits(repo: Repo, checkout_only: bool) -> None: 210 """Create the commits importing files according to the EXPECTED_UPSTREAM.""" 211 current_tracking_branch = repo.active_branch.tracking_branch() 212 if current_tracking_branch.name != "aosp/expected_upstream": 213 print("This script should only run on aosp/expected_upstream branch. " 214 f"Currently, this is on branch {repo.active_branch} " 215 f"tracking {current_tracking_branch}") 216 return 217 218 print("Reading EXPECTED_UPSTREAM file...") 219 expected_upstream_entries = ExpectedUpstreamFile().read_all_entries() 220 221 outdated_entries = validate_and_remove_updated_entries( 222 expected_upstream_entries, repo) 223 224 if not outdated_entries: 225 print("No need to update. All files are updated.") 226 return 227 228 print("The following entries will be updated from upstream") 229 for e in outdated_entries: 230 print(f" {e.dst_path}") 231 232 entry_sets_to_be_merged = partition_entries_by_ref(outdated_entries) 233 234 for entry_set in entry_sets_to_be_merged: 235 merge_files_and_create_commit(entry_set, repo, checkout_only) 236 237 238def main(argv: Sequence[str]) -> None: 239 arg_parser = argparse.ArgumentParser( 240 description="Read the EXPECTED_UPSTREAM and update the files from the " 241 "OpenJDK. By default, it creates commits forking from " 242 "the upstream version in order to preserve the line history.") 243 arg_parser.add_argument( 244 "--checkout-only", action="store_true", 245 help="Checkout the files, but creates no commits") 246 247 args = arg_parser.parse_args(argv) 248 249 checkout_only = args.checkout_only 250 251 repo = Repo(LIBCORE_DIR.as_posix()) 252 try: 253 create_commits(repo, checkout_only) 254 finally: 255 repo.close() 256 257 258if __name__ == "__main__": 259 main(sys.argv[1:]) 260