#!/usr/bin/env python3 # Copyright 2020 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """ Utility to disconnect history of files from a branch, and reconnect with base on a different branch. """ import argparse import collections import subprocess import sys import filtered_utils import lazytree import utils class CommitMetadataFactory(dict): """Dict-like class to read commit metadata""" def __missing__(self, key): """Reads commit metadata if missing""" value = filtered_utils.get_metadata(key) self.__setitem__(key, value) return value def disconnect(source_commit, ref_commit): """Creates a commit that disconnects files from source_commit. All files existing in ref_commit will be removed from source_commit. Args: source_commit: commit hash to disconnect from. ref_commit: commit hash to be a file list reference. """ source_files = utils.get_file_list(source_commit) ref_files = utils.get_file_list(ref_commit) ref_files_set = set(ref.path for ref in ref_files) kept_files = [ref for ref in source_files if ref.path not in ref_files_set] tree = utils.git_mktree(kept_files) return utils.git_commit( tree, [source_commit], message=b'Disconnect history from %s' % (source_commit.encode('ascii'))) def connect_base(current_commit, base_commit): """Creates a merge commit that takes files from base_commit. Literally it's identical to git merge base_commit in current_commit. Args: current_commit: commit hashes on where to commit to. base_commit: commit hashes contains file histories. """ current_files = utils.get_file_list(current_commit) base_files = utils.get_file_list(base_commit) tree = utils.git_mktree(current_files + base_files) return utils.git_commit( tree, [current_commit, base_commit], message=b'Connect history with base %s' % (base_commit.encode('ascii'))) def blame_files(commithash, files): """Blames files on givven commithash""" blames = {} for path in files: blames[path] = utils.git_blame(commithash, path) return blames def search_blame_line(blames, amend_commits, target_commit_hash): """Searches blames matching target_commit_hash in amend_commits Returns a map from file path to a list of tuple, each tuple has len(amend_commits) + 1 elements. 0-th element is the line in blames. and 1st to n-th element are corresponding lines in amend_commits blaems. Args: blames: a dict from path to list of GitBlameLine, for files blamed on target_commit_hash. amend_commits: a list of commit hashes to provide actual history. target_commit_hash: commit hash that blames are blaemd on. """ blames_combined = {} for blame_file_path, blame_file in blames.items(): blames_amend = [ utils.git_blame(commit, blame_file_path) for commit in amend_commits ] blames_combined[blame_file_path] = [ blame_combined for blame_combined in zip(blame_file, *blames_amend) if blame_combined[0].commit == target_commit_hash ] return blames_combined def get_track_from_blames(blames_combined, virtual_goal_commit, amend_commits, commit_choice_cache, commit_msg_cache): """Blames diffs and locate the amend commits. Returns a tuple containing: - a set of commit hashes in amend_commits tree; - a line-by-line mapping for files in diff to commit hashes in amend_commits tree of diffed lines. Args: blames_combined: a map from path to a list of tuple. each tuple reflect one line, and has len(amend_commits)+1 elements. See more details in search_blame_line. virtual_goal_commit: a commit that contains no useful history for diffs. amend_commits: list of HEAD commit hashes that refers to tree that can amend the diffs. commit_choice_cache: caches user choice on which amend commit to use. commit_msg_cache: caches commit metadata. """ blame_untracked_lines = {} commits_to_track = set() for blame_file_path, blame_lines in blames_combined.items(): blame_untracked_lines[blame_file_path] = [] for blame_line in blame_lines: original_commits = tuple( blame_amend.commit for blame_amend in list(blame_line)[1:]) chosen = commit_choice_cache.get(original_commits) if chosen is None: for idx, original_commit in enumerate(original_commits): print('%d: %s' % (idx, commit_msg_cache[original_commit].title)) # No validation on user_choice since no untrusted user. # Also the developer can rerun if entered wrongly by accident. user_choice = int(input('Choose patch: ')) chosen = original_commits[user_choice] commit_choice_cache[original_commits] = chosen commits_to_track.add(chosen) blame_untracked_lines[blame_file_path].append((blame_line[0], chosen)) return commits_to_track, blame_untracked_lines def reconstruct_file(blame_goal, blame_base, lines_to_reconstruct, virtual_goal_commit): """Reconstrucs a file to reflect changes in lines_to_reconstruct. Takes lines to blame_base, and blame_goal it belongs lines_to_reconstruct. It also deletes removed lines nearby. Returns a binary for the new file content. Args: blame_goal: a list of utils.GitBlameLine blaming the file on virtual_goal_commit. blame_base: a list of utils.GitBlameLine blaming the file on last commited commit. lines_to_reconstruct: only to reconstruct these lines, instead of everything in blame_goal. It is represented in a list of GitBlameLine. virtual_goal_commit: commit hash where blame_goal is based on. """ idx_base, idx_goal = 0, 0 reconstructed_file = [] print('Changed lines are', [line.data for line in lines_to_reconstruct]) line_iter = iter(lines_to_reconstruct) line = next(line_iter, None) while idx_base < len(blame_base) or idx_goal< len(blame_goal): # Both sides are idendical. We can't compare blame_base, and line # directly due to blame commit difference could end up different lineno. if (idx_base < len(blame_base) and blame_base[idx_base].data == blame_goal[idx_goal].data and blame_base[idx_base].commit == blame_goal[idx_goal].commit): # We append this line if both sides are identical. reconstructed_file.append(blame_base[idx_base].data) idx_base += 1 idx_goal += 1 should_skip_base = False elif line and blame_goal[idx_goal] == line: # We append the line from goal, if blame_goal[idx_goal] is the line # we're interested in. reconstructed_file.append(line.data) line = next(line_iter, None) idx_goal += 1 should_skip_base = True elif blame_goal[idx_goal].commit == virtual_goal_commit: # We skip the line from goal, if the change in not in the commit # we're interested. Thus, changed lines in other commits will not be # reflected. idx_goal += 1 else: # We should skip base if we just appended some lines from goal. # This would treat modified lines and append first and skip later. # If we didn't append something from goal, lines from base should be # preserved because the modified lines are not in the commit we're # currently interested in. if not should_skip_base: reconstructed_file.append(blame_base[idx_base].data) idx_base += 1 return b''.join([line + b'\n' for line in reconstructed_file]) def reconstruct_files(track_commit, blame_untracked_lines, blames, current_base_commit, virtual_goal_commit): """Reconstructs files to reflect changes in track_commit. Returns a map from file path to file content for reconstructed files. Args: track_commit: commit hashes to track, and reconstruct from. blame_untracked_lines: a line-by-line mapping regarding selected amend commits for diffs. see get_track_from_blames for more. blames: a map from filename to list of utils.GitBlameLine current_base_commit: commit hashes for HEAD of base that contains base history + already committed amend history. virtual_goal_commit: commit hash for one giant commit that has no history. virtual_goal_commit is one commit ahead of current_base_commit. """ lines_to_track = collections.defaultdict(list) for file, lines in blame_untracked_lines.items(): for line in lines: if line[1] == track_commit: lines_to_track[file].append(line[0]) constructed_files = {} for current_file, current_file_lines in lines_to_track.items(): print('Reconstructing', current_file, 'for', track_commit) blame_base = utils.git_blame(current_base_commit, current_file) constructed_files[current_file] = reconstruct_file( blames[current_file], blame_base, current_file_lines, virtual_goal_commit) return constructed_files def main(): # Init args parser = argparse.ArgumentParser(description='Reconnect git history') parser.add_argument( 'disconnect_from', metavar='disconnect_from', type=str, nargs=1, help='disconnect history from this commit') parser.add_argument( 'base_commit', metavar='base_commit', type=str, nargs=1, help='base commit to use the history') parser.add_argument( 'amend_commits', metavar='amend_commits', type=str, nargs='+', help='commits to amend histories from base_commit') arg = parser.parse_args(sys.argv[1:]) empty_commit = disconnect(arg.disconnect_from[0], arg.base_commit[0]) connected_base = connect_base(empty_commit, arg.base_commit[0]) commit_msg_cache = CommitMetadataFactory() commit_choice_cache = {} last_commit = connected_base # In each iteration of the loop, it # - re-create the new goal commit, (base + committed history + (one giant) # uncommited history). # - blame on new goal commit and tot of amend commits. map line-by-line # from uncommited to past histories. # - choose one of the past commits, reconstruct files to reflect changes in # that commit, and create a new commits. # last_commit, commit_msg_cache, commit_choice_cache will be persistent # across iteratins. while True: # One commit is processed per iteration. # Create virtual target commit, and its diff. virtual_goal = utils.git_commit(arg.disconnect_from[0] + '^{tree}', [last_commit]) diffs = utils.git_difftree(None, virtual_goal) if not diffs: print('No diffs are found between %s and goal.' % (last_commit.decode('ascii'),)) break blames = blame_files(virtual_goal, [diff.file.path for diff in diffs]) blames_combined = search_blame_line(blames, arg.amend_commits, virtual_goal) commits_to_track, blame_untracked_lines = get_track_from_blames( blames_combined, virtual_goal, arg.amend_commits, commit_choice_cache, commit_msg_cache) if not commits_to_track: print('no commits to track, stopping') break # Stablely choose one commit from commits_to_track, and reconstruct it. track_commit = min(commits_to_track) print('Reconstructing commit %s: %s' % (track_commit, commit_msg_cache[track_commit].title)) constructed_files = reconstruct_files(track_commit, blame_untracked_lines, blames, last_commit, virtual_goal) # Mktree and commit with re-constructed_files. tree = lazytree.LazyTree(filtered_utils.get_metadata(last_commit).tree) for filename, filedata in constructed_files.items(): blob = subprocess.check_output( ['git', 'hash-object', '-w', '/dev/stdin'], input=filedata).strip() tree[filename] = utils.GitFile(filename, tree[filename].mode, blob) meta = commit_msg_cache[track_commit] last_commit = utils.git_commit( tree.hash(), [last_commit], (meta.message + b'\n(Reconstructed from ' + track_commit + b')\n'), dict( GIT_AUTHOR_NAME=meta.authorship.name, GIT_AUTHOR_EMAIL=meta.authorship.email, GIT_AUTHOR_DATE=b' '.join( [meta.authorship.time, meta.authorship.timezone]))) print('Reconstructed as', last_commit) # Make last commit for history reconstruction. print( utils.git_commit( filtered_utils.get_metadata(arg.disconnect_from[0]).tree, [last_commit], b'Finished history reconstruction\n\nRemoving unnecessary lines\n')) if __name__ == '__main__': main()