1#!/usr/bin/python3 -B 2 3# Copyright 2022 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Read the EXPECTED_UPSTREAM and merge the files from the upstream.""" 18import argparse 19import datetime 20import logging 21# pylint: disable=g-importing-member 22import os.path 23from pathlib import Path 24import random 25import re 26import string 27import sys 28from typing import List 29from typing import Sequence 30 31# pylint: disable=g-multiple-import 32from common_util import ( 33 ExpectedUpstreamEntry, 34 ExpectedUpstreamFile, 35 has_file_in_tree, 36 LIBCORE_DIR, 37 OjluniFinder, 38) 39 40from git import ( 41 Commit, 42 DiffIndex, 43 GitCommandError, 44 Head, 45 IndexFile, 46 Repo, 47) 48 49# Enable INFO logging for error emitted by GitPython 50logging.basicConfig(level=logging.INFO) 51 52 53def validate_and_remove_updated_entries( 54 entries: List[ExpectedUpstreamEntry], 55 repo: Repo, commit: Commit) -> List[ExpectedUpstreamEntry]: 56 """Returns a list of entries of which the file content needs to be updated.""" 57 commit_tree = commit.tree 58 result: List[ExpectedUpstreamEntry] = [] 59 60 for e in entries: 61 try: 62 # The following step validate each entry by querying the git database 63 commit = repo.commit(e.git_ref) 64 source_blob = commit.tree.join(e.src_path) 65 if not has_file_in_tree(e.dst_path, commit_tree): 66 # Add the entry if the file is missing in the HEAD 67 result.append(e) 68 continue 69 70 dst_blob = commit_tree.join(e.dst_path) 71 # Add the entry if the content is different. 72 # data_stream will be close during GC. 73 if source_blob.data_stream.read() != dst_blob.data_stream.read(): 74 result.append(e) 75 except: 76 print(f"ERROR: reading entry: {e}", file=sys.stderr) 77 raise 78 79 return result 80 81 82THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR) 83 84TEMP_EXPECTED_BRANCH_PREFIX = "expected_upstream_" 85 86MSG_FIRST_COMMIT = ("Import {summary}\n" 87 "\n" 88 "List of files:\n" 89 " {files}\n" 90 "\n" 91 f"Generated by {THIS_TOOL_PATH}\n" 92 "\n" 93 "{bug}\n" 94 "Test: N/A" 95 "{change_id_str}") 96 97MSG_SECOND_COMMIT = ("Merge {summary} into the " 98 "aosp/master branch\n" 99 "\n" 100 "List of files:\n" 101 " {files}\n" 102 "\n" 103 "{bug}\n" 104 "Test: N/A" 105 "{change_id_str}") 106 107 108def create_commit_staging_diff(repo: Repo) -> None: 109 r"""Save the current EXPECTED_UPSTREAM filein a new git commit. 110 111 It can be retrieved later if this script fails. 112 113 Args: 114 repo: the repository object 115 """ 116 head = repo.head 117 index = IndexFile.from_tree(repo, head.commit) 118 index.add("EXPECTED_UPSTREAM") 119 120 now_str = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S") 121 msg = f"Staging EXPECTED_UPSTREAM at {now_str}" 122 commit = index.commit(message=msg, parent_commits=[head.commit], head=False) 123 124 print( 125 f"The current EXPECTED_UPSTREAM file is saved in {commit.hexsha}.\n" 126 "If this script fails in the later stage, please retrieve the file by:\n" 127 f" git checkout {commit.hexsha} -- EXPECTED_UPSTREAM") 128 129 130def create_commit_summary(diff_entries: List[ExpectedUpstreamEntry]) -> str: 131 r"""Create a commit summary message. 132 133 Args: 134 diff_entries: list of new / modified entries 135 136 Returns: 137 a string message 138 """ 139 140 default_msg = "files" 141 entries_and_names = [] 142 for e in diff_entries: 143 t = (e, OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path)) 144 entries_and_names.append(t) 145 146 # Non-test entries 147 important_entries: List[tuple[ExpectedUpstreamEntry, str]] = [ 148 t for t in entries_and_names 149 if t[1] is not None and not t[1].startswith("test.")] 150 if not important_entries: 151 # Test entries 152 important_entries = [t for t in entries_and_names if t[1] is not None and 153 t[1].startswith("test.")] 154 # no path is under OJLUNI_JAVA_BASE_PATH or OJLUNI_TEST_PATH 155 if not important_entries: 156 return default_msg 157 158 # Get ref if all entries come from the same OpenJDK revision 159 git_ref = important_entries[0][0].git_ref 160 for e in important_entries: 161 if e[0].git_ref != git_ref: 162 git_ref = None 163 break 164 165 if len(important_entries) == 1: 166 classes_summary = important_entries[0][1].split(".")[-1] 167 else: 168 common_prefix = os.path.commonprefix(list(map( 169 lambda t: t[1], important_entries))) 170 prefix_split = common_prefix.split(".") 171 172 # short java package, e.g. javax. or java.n, doesn't provide meaningful 173 # commit summary. 174 if len(prefix_split) <= 2: 175 classes_summary = default_msg 176 else: 177 # Assume that package name isn't title-case. 178 is_package = (not prefix_split[-1] or prefix_split[-1][0].islower()) 179 if is_package: 180 # Discard the prefix after the last "." 181 classes_summary = ".".join(prefix_split[:-1]) 182 else: 183 classes_summary = common_prefix + "*" 184 185 if git_ref is None: 186 return classes_summary 187 else: 188 abbv_ref = git_ref.split("/", 1)[-1] 189 return f"{classes_summary} from {abbv_ref}" 190 191 192def create_commit_at_expected_upstream( 193 repo: Repo, head: Head, new_entries: List[ExpectedUpstreamEntry], 194 bug_id: str, last_expected_change_id: str) -> Head: 195 r"""Create a new commit importing the given files at the head. 196 197 Args: 198 repo: the repository object 199 head: the temp expected_upstream branch 200 new_entries: a list of entries 201 bug_id: bug id 202 last_expected_change_id: Gerrit's change Id 203 204 Returns: 205 a list of entries 206 """ 207 dst_paths = [e.dst_path for e in new_entries] 208 str_dst_paths = "\n ".join(dst_paths) 209 210 for entry in new_entries: 211 ref = entry.git_ref 212 upstream_commit = repo.commit(ref) 213 src_blob = upstream_commit.tree[entry.src_path] 214 # Write into the file system directly because GitPython provides no API 215 # writing into the index in memory. IndexFile.move doesn't help here, 216 # because the API requires the file on the working tree too. 217 # However, it's fine, because we later reset the HEAD. 218 absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path) 219 absolute_dst_path.parent.mkdir(parents=True, exist_ok=True) 220 with absolute_dst_path.open("wb") as file: 221 file.write(src_blob.data_stream.read()) 222 223 entries = ExpectedUpstreamFile(head.commit.tree["EXPECTED_UPSTREAM"] 224 .data_stream.read()).read_all_entries() 225 entries = overlay_entries(entries, new_entries) 226 # Write the entries to the file system. 227 ExpectedUpstreamFile().sort_and_write_all_entries(entries) 228 229 index = IndexFile.from_tree(repo, head.commit) 230 index.add("EXPECTED_UPSTREAM") 231 for entry in new_entries: 232 index.add(entry.dst_path) 233 234 summary_msg = create_commit_summary(new_entries) 235 str_bug = "" if bug_id is None else f"Bug: {bug_id}" 236 change_id_str = "" 237 if last_expected_change_id: 238 change_id_str = f"\nChange-Id: {last_expected_change_id}" 239 msg = MSG_FIRST_COMMIT.format(summary=summary_msg, files=str_dst_paths, 240 bug=str_bug, change_id_str=change_id_str) 241 commit = index.commit(message=msg, parent_commits=[head.commit], head=False) 242 new_head = head.set_commit(commit) 243 244 print(f"Create a new commit {commit.hexsha} at {head.name}") 245 246 return new_head 247 248 249def overlay_entries( 250 existing_entries: List[ExpectedUpstreamEntry], 251 new_entries: List[ExpectedUpstreamEntry]) -> List[ExpectedUpstreamEntry]: 252 r"""Return a list of entries after overlaying the new_entries. 253 254 Args: 255 existing_entries: current entries 256 new_entries: entries being overlaid 257 Returns: 258 a list of entries 259 """ 260 entries_map = {} 261 for e in existing_entries: 262 entries_map[e.dst_path] = e 263 264 for e in new_entries: 265 entries_map[e.dst_path] = e 266 267 return [e for key, e in entries_map.items()] 268 269 270REGEX_CHANGE_ID = r"^Change-Id: (I[0-9a-f]+)$" 271REGEX_BUG_ID = r"^Bug: ([0-9]+)$" 272 273 274def extract_change_id(commit: Commit) -> str: 275 r"""Extract gerrit's Change-Id from a commit message. 276 277 Args: 278 commit: commit 279 280 Returns: 281 Change-Id 282 """ 283 result = re.search(REGEX_CHANGE_ID, commit.message, re.M) 284 return result.group(1) if result else None 285 286 287def extract_bug_id(commit: Commit) -> str: 288 r"""Extract the bug id from a commit message. 289 290 Args: 291 commit: commit 292 293 Returns: 294 Buganizer Id 295 """ 296 result = re.search(REGEX_BUG_ID, commit.message, re.M) 297 return result.group(1) if result else None 298 299 300def get_diff_entries( 301 repo: Repo, base_expected_commit: Commit) -> List[ExpectedUpstreamEntry]: 302 """Get a list of entries different from the head commit. 303 304 Validate EXPECTED_UPSTREAM file and return the list of 305 modified or new entries between the working tree and HEAD. 306 307 Args: 308 repo: Repo 309 base_expected_commit: the base commit 310 311 Returns: 312 a list of entries 313 """ 314 current_tracking_branch = repo.active_branch.tracking_branch() 315 if current_tracking_branch.name != "aosp/master": 316 print("This script should only run on aosp/master branch. " 317 f"Currently, this is on branch {repo.active_branch} " 318 f"tracking {current_tracking_branch}", file=sys.stderr) 319 return None 320 321 print("Reading EXPECTED_UPSTREAM file...") 322 head_commit = repo.head.commit 323 diff_index = head_commit.diff(None) 324 no_file_change = len(diff_index) 325 if no_file_change == 0: 326 print("Can't find any EXPECTED_UPSTREAM file change", file=sys.stderr) 327 return None 328 elif no_file_change > 1 or diff_index[0].a_rawpath != b"EXPECTED_UPSTREAM": 329 print("Expect modification in the EXPECTED_UPSTREAM file only.\n" 330 "Please remove / commit the other changes. The below file changes " 331 "are detected: ", file=sys.stderr) 332 print_diff_index(diff_index, file=sys.stderr) 333 return None 334 335 prev_file = ExpectedUpstreamFile(head_commit.tree["EXPECTED_UPSTREAM"] 336 .data_stream.read()) 337 curr_file = ExpectedUpstreamFile() 338 diff_entries = prev_file.get_new_or_modified_entries(curr_file) 339 340 outdated_entries = validate_and_remove_updated_entries( 341 diff_entries, repo, base_expected_commit) 342 343 if not outdated_entries: 344 print("No need to update. All files are updated.") 345 return None 346 347 print("The following entries will be updated from upstream") 348 for e in outdated_entries: 349 print(f" {e.dst_path}") 350 351 return diff_entries 352 353 354def compute_absorbed_diff_entries( 355 repo: Repo, base_commit: Commit, commit: Commit, overlaid_entries: List[ 356 ExpectedUpstreamEntry]) -> List[ExpectedUpstreamEntry]: 357 r"""Compute the combined entries after absorbing the new changes. 358 359 Args: 360 repo: Repo 361 base_commit: the base commit in the expected_upstream 362 commit: The commit diff-ed against from the base_commit 363 overlaid_entries: Additional entries overlaid on top of the diff. 364 365 Returns: 366 Combined diff entries 367 """ 368 prev_file = ExpectedUpstreamFile(base_commit.tree["EXPECTED_UPSTREAM"] 369 .data_stream.read()) 370 curr_file = ExpectedUpstreamFile(commit.tree["EXPECTED_UPSTREAM"] 371 .data_stream.read()) 372 diff_entries = prev_file.get_new_or_modified_entries(curr_file) 373 diff_entries = overlay_entries(diff_entries, overlaid_entries) 374 return validate_and_remove_updated_entries(diff_entries, repo, base_commit) 375 376 377def main_run( 378 repo: Repo, expected_upstream_base: str, 379 bug_id: str, use_rerere: bool, is_absorbed: bool) -> None: 380 """Create the commits importing files according to the EXPECTED_UPSTREAM. 381 382 Args: 383 repo: Repo 384 expected_upstream_base: the base commit in the expected_upstream branch. 385 bug_id: bug id 386 use_rerere: Reuses the recorded resolution from git 387 is_absorbed: Absorb the new changes from EXPECTED_UPSTREAM into the 388 existing commits created by this script 389 """ 390 last_master_commit = repo.head.commit 391 last_master_change_id = None 392 last_expected_change_id = None 393 if is_absorbed: 394 head = repo.head 395 if len(head.commit.parents) != 2: 396 print("Error: HEAD isn't a merge commit.", file=sys.stderr) 397 return 398 399 last_branch = None 400 last_expected_commit = None 401 for commit in head.commit.parents: 402 name_rev: list[str] = commit.name_rev.split(" ", 1) 403 if (len(name_rev) > 1 and # name_rev[1] is usualy the branch name 404 name_rev[1].startswith(TEMP_EXPECTED_BRANCH_PREFIX)): 405 last_branch = name_rev[1] 406 last_expected_commit = commit 407 else: 408 last_master_commit = commit 409 410 if last_branch is None: 411 print("Error: Can't find the last commit in the expected_upstream " 412 "branch.", file=sys.stderr) 413 return 414 415 if len(last_expected_commit.parents) != 1: 416 print(f"Error: The head commit at {last_branch} isn't in the expected " 417 f"state.") 418 return 419 420 base_expected_branch_commit = last_expected_commit.parents[0] 421 last_expected_change_id = extract_change_id(last_expected_commit) 422 last_master_change_id = extract_change_id(head.commit) 423 if bug_id is None: 424 bug_id = extract_bug_id(last_expected_commit) 425 else: 426 if expected_upstream_base is None: 427 expected_upstream_base = "aosp/expected_upstream" 428 try: 429 base_expected_branch_commit = repo.commit(expected_upstream_base) 430 finally: 431 if base_expected_branch_commit is None: 432 print(f"{expected_upstream_base} is not found in this repository.", 433 file=sys.stderr) 434 435 diff_entries = get_diff_entries(repo, base_expected_branch_commit) 436 if not diff_entries: 437 return 438 439 if is_absorbed: 440 diff_entries = compute_absorbed_diff_entries( 441 repo, base_expected_branch_commit, last_expected_commit, diff_entries) 442 443 create_commit_staging_diff(repo) 444 445 branch_name = create_random_branch_name() 446 new_branch = repo.create_head(branch_name, base_expected_branch_commit.hexsha) 447 new_branch.set_tracking_branch(repo.remotes.aosp.refs.expected_upstream) 448 new_branch = create_commit_at_expected_upstream( 449 repo, new_branch, diff_entries, bug_id, last_expected_change_id) 450 451 # Clean the working tree before merging branch 452 repo.head.reset(commit=last_master_commit, working_tree=True) 453 for e in diff_entries: 454 if not has_file_in_tree(e.dst_path, repo.head.commit.tree): 455 path = Path(LIBCORE_DIR, e.dst_path) 456 path.unlink(missing_ok=True) 457 458 dst_paths = [e.dst_path for e in diff_entries] 459 str_dst_paths = "\n ".join(dst_paths) 460 summary_msg = create_commit_summary(diff_entries) 461 str_bug = "" if bug_id is None else f"Bug: {bug_id}" 462 change_id_str = "" 463 if last_master_change_id: 464 change_id_str = f"\nChange-Id: {last_master_change_id}" 465 msg = MSG_SECOND_COMMIT.format( 466 summary=summary_msg, files=str_dst_paths, bug=str_bug, 467 change_id_str=change_id_str) 468 rerere_str = "rerere.enabled=" 469 rerere_str += "true" if use_rerere else "false" 470 471 # Run git-merge command here, and will let the user to handle 472 # any errors and merge conflicts 473 try: 474 repo.git.execute(["git", "-c", rerere_str, "merge", 475 new_branch.commit.hexsha, "-m", msg]) 476 except GitCommandError as err: 477 print(f"Error: {err}", file=sys.stderr) 478 479 480def create_random_branch_name(): 481 rand_suffix = "".join(random.choice(string.ascii_lowercase + 482 string.digits) for _ in range(10)) 483 return f"{TEMP_EXPECTED_BRANCH_PREFIX}{rand_suffix}" 484 485 486def print_diff_index(index: DiffIndex, file=sys.stdout) -> None: 487 for diff in index: 488 print(f" {diff.a_rawpath}", file=file) 489 490 491def main(argv: Sequence[str]) -> None: 492 arg_parser = argparse.ArgumentParser( 493 description="Read the EXPECTED_UPSTREAM and update the files from the " 494 "OpenJDK. This script imports the files from OpenJDK into " 495 "the expected_upstream branch and merges it into the " 496 "current branch.") 497 arg_parser.add_argument( 498 "-a", "--absorbed-to-last-merge", action="store_true", 499 help="Import more files but absorb them into the last commits created " 500 "by this script.") 501 arg_parser.add_argument( 502 "--disable-rerere", action="store_true", 503 help="Do not re-use the recorded resolution from git.") 504 arg_parser.add_argument( 505 "-b", "--bug", nargs="?", 506 help="Buganizer Id") 507 arg_parser.add_argument( 508 "-e", "--expected_upstream_base", nargs="?", 509 help="The base commit in the expected_upstream branch") 510 511 args = arg_parser.parse_args(argv) 512 513 bug_id = args.bug 514 expected_upstream_base = args.expected_upstream_base 515 use_rerere = not args.disable_rerere 516 is_absorbed = args.absorbed_to_last_merge 517 if is_absorbed and expected_upstream_base is not None: 518 print("Error: -a and -e options can't be used together.", file=sys.stderr) 519 return 520 521 repo = Repo(LIBCORE_DIR.as_posix()) 522 try: 523 main_run(repo, expected_upstream_base, bug_id, use_rerere, is_absorbed) 524 finally: 525 repo.close() 526 527 528if __name__ == "__main__": 529 main(sys.argv[1:]) 530