• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3 -B
2
3# Copyright 2022 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Read the EXPECTED_UPSTREAM and  merge the files from the upstream."""
18import argparse
19import datetime
20import logging
21# pylint: disable=g-importing-member
22import os.path
23from pathlib import Path
24import random
25import re
26import string
27import sys
28from typing import List
29from typing import Sequence
30
31# pylint: disable=g-multiple-import
32from common_util import (
33    ExpectedUpstreamEntry,
34    ExpectedUpstreamFile,
35    has_file_in_tree,
36    LIBCORE_DIR,
37    OjluniFinder,
38)
39
40from git import (
41    Commit,
42    DiffIndex,
43    GitCommandError,
44    Head,
45    IndexFile,
46    Repo,
47)
48
49# Enable INFO logging for error emitted by GitPython
50logging.basicConfig(level=logging.INFO)
51
52
53def validate_and_remove_updated_entries(
54    entries: List[ExpectedUpstreamEntry],
55    repo: Repo, commit: Commit) -> List[ExpectedUpstreamEntry]:
56  """Returns a list of entries of which the file content needs to be updated."""
57  commit_tree = commit.tree
58  result: List[ExpectedUpstreamEntry] = []
59
60  for e in entries:
61    try:
62      # The following step validate each entry by querying the git database
63      commit = repo.commit(e.git_ref)
64      source_blob = commit.tree.join(e.src_path)
65      if not has_file_in_tree(e.dst_path, commit_tree):
66        # Add the entry if the file is missing in the HEAD
67        result.append(e)
68        continue
69
70      dst_blob = commit_tree.join(e.dst_path)
71      # Add the entry if the content is different.
72      # data_stream will be close during GC.
73      if source_blob.data_stream.read() != dst_blob.data_stream.read():
74        result.append(e)
75    except:
76      print(f"ERROR: reading entry: {e}", file=sys.stderr)
77      raise
78
79  return result
80
81
82THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR)
83
84TEMP_EXPECTED_BRANCH_PREFIX = "expected_upstream_"
85
86MSG_FIRST_COMMIT = ("Import {summary}\n"
87                    "\n"
88                    "List of files:\n"
89                    "  {files}\n"
90                    "\n"
91                    f"Generated by {THIS_TOOL_PATH}\n"
92                    "\n"
93                    "{bug}\n"
94                    "Test: N/A"
95                    "{change_id_str}")
96
97MSG_SECOND_COMMIT = ("Merge {summary} into the "
98                     "aosp/master branch\n"
99                     "\n"
100                     "List of files:\n"
101                     "  {files}\n"
102                     "\n"
103                     "{bug}\n"
104                     "Test: N/A"
105                     "{change_id_str}")
106
107
108def create_commit_staging_diff(repo: Repo) -> None:
109  r"""Save the current EXPECTED_UPSTREAM filein a new git commit.
110
111  It can be retrieved later if this script fails.
112
113  Args:
114    repo: the repository object
115  """
116  head = repo.head
117  index = IndexFile.from_tree(repo, head.commit)
118  index.add("EXPECTED_UPSTREAM")
119
120  now_str = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")
121  msg = f"Staging EXPECTED_UPSTREAM at {now_str}"
122  commit = index.commit(message=msg, parent_commits=[head.commit], head=False)
123
124  print(
125      f"The current EXPECTED_UPSTREAM file is saved in {commit.hexsha}.\n"
126      "If this script fails in the later stage, please retrieve the file by:\n"
127      f"  git checkout {commit.hexsha} -- EXPECTED_UPSTREAM")
128
129
130def create_commit_summary(diff_entries: List[ExpectedUpstreamEntry]) -> str:
131  r"""Create a commit summary message.
132
133  Args:
134    diff_entries: list of new / modified entries
135
136  Returns:
137    a string message
138  """
139
140  default_msg = "files"
141  entries_and_names = []
142  for e in diff_entries:
143    t = (e, OjluniFinder.translate_ojluni_path_to_class_name(e.dst_path))
144    entries_and_names.append(t)
145
146  # Non-test entries
147  important_entries: List[tuple[ExpectedUpstreamEntry, str]] = [
148      t for t in entries_and_names
149      if t[1] is not None and not t[1].startswith("test.")]
150  if not important_entries:
151    # Test entries
152    important_entries = [t for t in entries_and_names if t[1] is not None and
153                         t[1].startswith("test.")]
154    # no path is under OJLUNI_JAVA_BASE_PATH or OJLUNI_TEST_PATH
155    if not important_entries:
156      return default_msg
157
158  # Get ref if all entries come from the same OpenJDK revision
159  git_ref = important_entries[0][0].git_ref
160  for e in important_entries:
161    if e[0].git_ref != git_ref:
162      git_ref = None
163      break
164
165  if len(important_entries) == 1:
166    classes_summary = important_entries[0][1].split(".")[-1]
167  else:
168    common_prefix = os.path.commonprefix(list(map(
169        lambda t: t[1], important_entries)))
170    prefix_split = common_prefix.split(".")
171
172    # short java package, e.g. javax. or java.n, doesn't provide meaningful
173    # commit summary.
174    if len(prefix_split) <= 2:
175      classes_summary = default_msg
176    else:
177      # Assume that package name isn't title-case.
178      is_package = (not prefix_split[-1] or prefix_split[-1][0].islower())
179      if is_package:
180        # Discard the prefix after the last "."
181        classes_summary = ".".join(prefix_split[:-1])
182      else:
183        classes_summary = common_prefix + "*"
184
185  if git_ref is None:
186    return classes_summary
187  else:
188    abbv_ref = git_ref.split("/", 1)[-1]
189    return f"{classes_summary} from {abbv_ref}"
190
191
192def create_commit_at_expected_upstream(
193    repo: Repo, head: Head, new_entries: List[ExpectedUpstreamEntry],
194    bug_id: str, last_expected_change_id: str) -> Head:
195  r"""Create a new commit importing the given files at the head.
196
197  Args:
198    repo: the repository object
199    head: the temp expected_upstream branch
200    new_entries: a list of entries
201    bug_id: bug id
202    last_expected_change_id: Gerrit's change Id
203
204  Returns:
205    a list of entries
206  """
207  dst_paths = [e.dst_path for e in new_entries]
208  str_dst_paths = "\n  ".join(dst_paths)
209
210  for entry in new_entries:
211    ref = entry.git_ref
212    upstream_commit = repo.commit(ref)
213    src_blob = upstream_commit.tree[entry.src_path]
214    # Write into the file system directly because GitPython provides no API
215    # writing into the index in memory. IndexFile.move doesn't help here,
216    # because the API requires the file on the working tree too.
217    # However, it's fine, because we later reset the HEAD.
218    absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path)
219    absolute_dst_path.parent.mkdir(parents=True, exist_ok=True)
220    with absolute_dst_path.open("wb") as file:
221      file.write(src_blob.data_stream.read())
222
223  entries = ExpectedUpstreamFile(head.commit.tree["EXPECTED_UPSTREAM"]
224                                 .data_stream.read()).read_all_entries()
225  entries = overlay_entries(entries, new_entries)
226  # Write the entries to the file system.
227  ExpectedUpstreamFile().sort_and_write_all_entries(entries)
228
229  index = IndexFile.from_tree(repo, head.commit)
230  index.add("EXPECTED_UPSTREAM")
231  for entry in new_entries:
232    index.add(entry.dst_path)
233
234  summary_msg = create_commit_summary(new_entries)
235  str_bug = "" if bug_id is None else f"Bug: {bug_id}"
236  change_id_str = ""
237  if last_expected_change_id:
238    change_id_str = f"\nChange-Id: {last_expected_change_id}"
239  msg = MSG_FIRST_COMMIT.format(summary=summary_msg, files=str_dst_paths,
240                                bug=str_bug, change_id_str=change_id_str)
241  commit = index.commit(message=msg, parent_commits=[head.commit], head=False)
242  new_head = head.set_commit(commit)
243
244  print(f"Create a new commit {commit.hexsha} at {head.name}")
245
246  return new_head
247
248
249def overlay_entries(
250    existing_entries: List[ExpectedUpstreamEntry],
251    new_entries: List[ExpectedUpstreamEntry]) -> List[ExpectedUpstreamEntry]:
252  r"""Return a list of entries after overlaying the new_entries.
253
254  Args:
255    existing_entries: current entries
256    new_entries: entries being overlaid
257  Returns:
258    a list of entries
259  """
260  entries_map = {}
261  for e in existing_entries:
262    entries_map[e.dst_path] = e
263
264  for e in new_entries:
265    entries_map[e.dst_path] = e
266
267  return [e for key, e in entries_map.items()]
268
269
270REGEX_CHANGE_ID = r"^Change-Id: (I[0-9a-f]+)$"
271REGEX_BUG_ID = r"^Bug: ([0-9]+)$"
272
273
274def extract_change_id(commit: Commit) -> str:
275  r"""Extract gerrit's Change-Id from a commit message.
276
277  Args:
278     commit: commit
279
280  Returns:
281    Change-Id
282  """
283  result = re.search(REGEX_CHANGE_ID, commit.message, re.M)
284  return result.group(1) if result else None
285
286
287def extract_bug_id(commit: Commit) -> str:
288  r"""Extract the bug id from a commit message.
289
290  Args:
291     commit: commit
292
293  Returns:
294    Buganizer Id
295  """
296  result = re.search(REGEX_BUG_ID, commit.message, re.M)
297  return result.group(1) if result else None
298
299
300def get_diff_entries(
301    repo: Repo, base_expected_commit: Commit) -> List[ExpectedUpstreamEntry]:
302  """Get a list of entries different from the head commit.
303
304  Validate EXPECTED_UPSTREAM file and return the list of
305  modified or new entries between the working tree and HEAD.
306
307  Args:
308    repo: Repo
309    base_expected_commit: the base commit
310
311  Returns:
312    a list of entries
313  """
314  current_tracking_branch = repo.active_branch.tracking_branch()
315  if current_tracking_branch.name != "aosp/master":
316    print("This script should only run on aosp/master branch. "
317          f"Currently, this is on branch {repo.active_branch} "
318          f"tracking {current_tracking_branch}", file=sys.stderr)
319    return None
320
321  print("Reading EXPECTED_UPSTREAM file...")
322  head_commit = repo.head.commit
323  diff_index = head_commit.diff(None)
324  no_file_change = len(diff_index)
325  if no_file_change == 0:
326    print("Can't find any EXPECTED_UPSTREAM file change", file=sys.stderr)
327    return None
328  elif no_file_change > 1 or diff_index[0].a_rawpath != b"EXPECTED_UPSTREAM":
329    print("Expect modification in the EXPECTED_UPSTREAM file only.\n"
330          "Please remove / commit the other changes. The below file changes "
331          "are detected: ", file=sys.stderr)
332    print_diff_index(diff_index, file=sys.stderr)
333    return None
334
335  prev_file = ExpectedUpstreamFile(head_commit.tree["EXPECTED_UPSTREAM"]
336                                   .data_stream.read())
337  curr_file = ExpectedUpstreamFile()
338  diff_entries = prev_file.get_new_or_modified_entries(curr_file)
339
340  outdated_entries = validate_and_remove_updated_entries(
341      diff_entries, repo, base_expected_commit)
342
343  if not outdated_entries:
344    print("No need to update. All files are updated.")
345    return None
346
347  print("The following entries will be updated from upstream")
348  for e in outdated_entries:
349    print(f"  {e.dst_path}")
350
351  return diff_entries
352
353
354def compute_absorbed_diff_entries(
355    repo: Repo, base_commit: Commit, commit: Commit, overlaid_entries: List[
356        ExpectedUpstreamEntry]) -> List[ExpectedUpstreamEntry]:
357  r"""Compute the combined entries after absorbing the new changes.
358
359  Args:
360    repo: Repo
361    base_commit: the base commit in the expected_upstream
362    commit: The commit diff-ed against from the base_commit
363    overlaid_entries: Additional entries overlaid on top of the diff.
364
365  Returns:
366    Combined diff entries
367  """
368  prev_file = ExpectedUpstreamFile(base_commit.tree["EXPECTED_UPSTREAM"]
369                                   .data_stream.read())
370  curr_file = ExpectedUpstreamFile(commit.tree["EXPECTED_UPSTREAM"]
371                                   .data_stream.read())
372  diff_entries = prev_file.get_new_or_modified_entries(curr_file)
373  diff_entries = overlay_entries(diff_entries, overlaid_entries)
374  return validate_and_remove_updated_entries(diff_entries, repo, base_commit)
375
376
377def main_run(
378    repo: Repo, expected_upstream_base: str,
379    bug_id: str, use_rerere: bool, is_absorbed: bool) -> None:
380  """Create the commits importing files according to the EXPECTED_UPSTREAM.
381
382  Args:
383    repo: Repo
384    expected_upstream_base: the base commit in the expected_upstream branch.
385    bug_id: bug id
386    use_rerere: Reuses the recorded resolution from git
387    is_absorbed: Absorb the new changes from EXPECTED_UPSTREAM into the
388      existing commits created by this script
389  """
390  last_master_commit = repo.head.commit
391  last_master_change_id = None
392  last_expected_change_id = None
393  if is_absorbed:
394    head = repo.head
395    if len(head.commit.parents) != 2:
396      print("Error: HEAD isn't a merge commit.", file=sys.stderr)
397      return
398
399    last_branch = None
400    last_expected_commit = None
401    for commit in head.commit.parents:
402      name_rev: list[str] = commit.name_rev.split(" ", 1)
403      if (len(name_rev) > 1 and  # name_rev[1] is usualy the branch name
404          name_rev[1].startswith(TEMP_EXPECTED_BRANCH_PREFIX)):
405        last_branch = name_rev[1]
406        last_expected_commit = commit
407      else:
408        last_master_commit = commit
409
410    if last_branch is None:
411      print("Error: Can't find the last commit in the expected_upstream "
412            "branch.", file=sys.stderr)
413      return
414
415    if len(last_expected_commit.parents) != 1:
416      print(f"Error: The head commit at {last_branch} isn't in the expected "
417            f"state.")
418      return
419
420    base_expected_branch_commit = last_expected_commit.parents[0]
421    last_expected_change_id = extract_change_id(last_expected_commit)
422    last_master_change_id = extract_change_id(head.commit)
423    if bug_id is None:
424      bug_id = extract_bug_id(last_expected_commit)
425  else:
426    if expected_upstream_base is None:
427      expected_upstream_base = "aosp/expected_upstream"
428    try:
429      base_expected_branch_commit = repo.commit(expected_upstream_base)
430    finally:
431      if base_expected_branch_commit is None:
432        print(f"{expected_upstream_base} is not found in this repository.",
433              file=sys.stderr)
434
435  diff_entries = get_diff_entries(repo, base_expected_branch_commit)
436  if not diff_entries:
437    return
438
439  if is_absorbed:
440    diff_entries = compute_absorbed_diff_entries(
441        repo, base_expected_branch_commit, last_expected_commit, diff_entries)
442
443  create_commit_staging_diff(repo)
444
445  branch_name = create_random_branch_name()
446  new_branch = repo.create_head(branch_name, base_expected_branch_commit.hexsha)
447  new_branch.set_tracking_branch(repo.remotes.aosp.refs.expected_upstream)
448  new_branch = create_commit_at_expected_upstream(
449      repo, new_branch, diff_entries, bug_id, last_expected_change_id)
450
451  # Clean the working tree before merging branch
452  repo.head.reset(commit=last_master_commit, working_tree=True)
453  for e in diff_entries:
454    if not has_file_in_tree(e.dst_path, repo.head.commit.tree):
455      path = Path(LIBCORE_DIR, e.dst_path)
456      path.unlink(missing_ok=True)
457
458  dst_paths = [e.dst_path for e in diff_entries]
459  str_dst_paths = "\n  ".join(dst_paths)
460  summary_msg = create_commit_summary(diff_entries)
461  str_bug = "" if bug_id is None else f"Bug: {bug_id}"
462  change_id_str = ""
463  if last_master_change_id:
464    change_id_str = f"\nChange-Id: {last_master_change_id}"
465  msg = MSG_SECOND_COMMIT.format(
466      summary=summary_msg, files=str_dst_paths, bug=str_bug,
467      change_id_str=change_id_str)
468  rerere_str = "rerere.enabled="
469  rerere_str += "true" if use_rerere else "false"
470
471  # Run git-merge command here, and will let the user to handle
472  # any errors and merge conflicts
473  try:
474    repo.git.execute(["git", "-c", rerere_str, "merge",
475                      new_branch.commit.hexsha, "-m", msg])
476  except GitCommandError as err:
477    print(f"Error: {err}", file=sys.stderr)
478
479
480def create_random_branch_name():
481  rand_suffix = "".join(random.choice(string.ascii_lowercase +
482                                      string.digits) for _ in range(10))
483  return f"{TEMP_EXPECTED_BRANCH_PREFIX}{rand_suffix}"
484
485
486def print_diff_index(index: DiffIndex, file=sys.stdout) -> None:
487  for diff in index:
488    print(f"  {diff.a_rawpath}", file=file)
489
490
491def main(argv: Sequence[str]) -> None:
492  arg_parser = argparse.ArgumentParser(
493      description="Read the EXPECTED_UPSTREAM and update the files from the "
494                  "OpenJDK. This script imports the files from OpenJDK into "
495                  "the expected_upstream branch and merges it into the "
496                  "current branch.")
497  arg_parser.add_argument(
498      "-a", "--absorbed-to-last-merge", action="store_true",
499      help="Import more files but absorb them into the last commits created "
500           "by this script.")
501  arg_parser.add_argument(
502      "--disable-rerere", action="store_true",
503      help="Do not re-use the recorded resolution from git.")
504  arg_parser.add_argument(
505      "-b", "--bug", nargs="?",
506      help="Buganizer Id")
507  arg_parser.add_argument(
508      "-e", "--expected_upstream_base", nargs="?",
509      help="The base commit in the expected_upstream branch")
510
511  args = arg_parser.parse_args(argv)
512
513  bug_id = args.bug
514  expected_upstream_base = args.expected_upstream_base
515  use_rerere = not args.disable_rerere
516  is_absorbed = args.absorbed_to_last_merge
517  if is_absorbed and expected_upstream_base is not None:
518    print("Error: -a and -e options can't be used together.", file=sys.stderr)
519    return
520
521  repo = Repo(LIBCORE_DIR.as_posix())
522  try:
523    main_run(repo, expected_upstream_base, bug_id, use_rerere, is_absorbed)
524  finally:
525    repo.close()
526
527
528if __name__ == "__main__":
529  main(sys.argv[1:])
530