#!/usr/bin/env python3
# Copyright 2021 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Run an equivalent to the backfill pipeline locally and generate diffs.

Parse the actual current builder configurations from BuildBucket and run
the join_config_payloads.py script locally.  Generate a diff that shows any
changes using the tip-of-tree code vs what's running in production.
"""

import argparse
import collections
import functools
import itertools
import json
import logging
import multiprocessing
import multiprocessing.pool
import os
import pathlib
import shutil
import subprocess
import sys
import tempfile
import time

from common import utilities

# resolve relative directories
this_dir = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
hwid_path = (this_dir / "../../platform/chromeos-hwid/v3").resolve()
join_script = (this_dir / "../payload_utils/join_config_payloads.py").resolve()
merge_script = (this_dir / "../payload_utils/aggregate_messages.py").resolve()
public_path = (this_dir / "../../overlays").resolve()
private_path = (this_dir / "../../private-overlays").resolve()
project_path = (this_dir / "../../project").resolve()

# record to store backfiller configuration in
BackfillConfig = collections.namedtuple('BackfillConfig', [
    'program',
    'project',
    'hwid_key',
    'public_model',
    'private_repo',
    'private_model',
])


def parse_build_property(build, name):
  """Parse out a property value from a build and return its value.

  Properties are always JSON values, so we decode them and return the
  resulting object

  Args:
    build (dict): json object containing BuildBucket properties
    name (str): name of the property to look up

  Return:
    decoded property value or None if not found
  """
  return json.loads(build["config"]["properties"]).get(name)


def run_backfill(config,
                 args,
                 logname=None,
                 run_imported=True,
                 run_joined=True):
  """Run a single backfill job, return diff of current and new output.

  Args:
    config: BackfillConfig instance for the backfill operation.
    args: Commandline arguments
    logname: Filename to redirect stderr to from backfill
      default is to suppress the output
    run_imported: If True, generate a diff for the imported payload
    run_joined: If True, generate a diff for the joined payload
  """

  def run_diff(cmd, current, output):
    """Execute cmd and diff the current and output files"""
    logfile.write("running: {}\n".format(" ".join(map(str, cmd))))

    subprocess.run(cmd, stderr=logfile, check=True)

    # if one or the other file doesn't exist, return the other as a diff
    if current.exists() != output.exists():
      if current.exists():
        return open(current).read()
      return open(output).read()

    # otherwise run diff
    return utilities.jqdiff(current, output)

  #### start of function body

  # path to project repo and config bundle
  path_repo = project_path / config.program / config.project
  path_config = path_repo / "generated/config.jsonproto"

  logfile = subprocess.DEVNULL
  if logname:
    logfile = open(logname, "a")

  # reef/fizz are currently broken because it _needs_ a real portage environment
  # to pull in common code.
  # TODO(https://crbug.com/1144956): fix when reef is corrected
  if config.program in ["reef", "fizz"]:
    return None

  cmd = [join_script, "--l", "DEBUG"]
  cmd.extend(["--program-name", config.program])
  cmd.extend(["--project-name", config.project])

  if path_config.exists():
    cmd.extend(["--config-bundle", path_config])

  if config.hwid_key:
    cmd.extend(["--hwid", hwid_path / config.hwid_key])

  if config.public_model:
    cmd.extend(["--public-model", public_path / config.public_model])

  if config.private_model:
    overlay = config.private_repo.split('/')[-1]
    cmd.extend(
        ["--private-model", private_path / overlay / config.private_model])

  # create output directory if it doesn't exist
  if args.save_imported_payloads:
    os.makedirs(
        os.path.join(args.save_imported_payloads, config.project),
        exist_ok=True,
    )

  if args.save_joined_payloads:
    os.makedirs(
        os.path.join(args.save_joined_payloads, config.project),
        exist_ok=True,
    )

  # create temporary directory for output
  diff_imported = ""
  diff_joined = ""
  with tempfile.TemporaryDirectory() as scratch:
    scratch = pathlib.Path(scratch)

    old_imported_prefix = path_repo / "generated"
    if args.diff_imported_against:
      old_imported_prefix = pathlib.Path(
          os.path.join(
              args.diff_imported_against,
              config.project,
          ))

    # generate diff of imported payloads
    path_imported_old = old_imported_prefix / "imported.jsonproto"
    path_imported_new = scratch / "imported.jsonproto"

    if run_imported:
      diff_imported = run_diff(
          cmd + ["--import-only", "--output", path_imported_new],
          path_imported_old,
          path_imported_new,
      )

      if args.save_imported_payloads:
        shutil.copyfile(
            path_imported_new,
            os.path.join(
                args.save_imported_payloads,
                config.project,
                "imported.jsonproto",
            ),
        )

    old_joined_prefix = path_repo / "generated"
    if args.diff_joined_against:
      old_joined_prefix = pathlib.Path(
          os.path.join(
              args.diff_joined_against,
              config.project,
          ))

    # generate diff of joined payloads
    if run_joined and path_config.exists():
      path_joined_old = old_joined_prefix / "joined.jsonproto"
      path_joined_new = scratch / "joined.jsonproto"

      diff_joined = run_diff(cmd + ["--output", path_joined_new],
                             path_joined_old, path_joined_new)

      if args.save_joined_payloads:
        shutil.copyfile(
            path_joined_new,
            os.path.join(
                args.save_joined_payloads,
                config.project,
                "joined.jsonproto",
            ),
        )

  return ("{}-{}".format(config.program,
                         config.project), diff_imported, diff_joined)


def run_backfills(args, configs):
  """Run backfill pipeline for each builder in configs.

  Generate an über diff showing the changes that the current ToT
  join_config_payloads code would generate vs what's currently committed.

  Write the result to the output file specified on the command line.

  Args:
    args: command line arguments from argparse
    configs: list of BackfillConfig instances to execute

  Return:
    nothing
  """

  # create a logfile if requested
  kwargs = {}
  kwargs["run_joined"] = args.joined_diff is not None
  kwargs["args"] = args
  if args.logfile:
    # open and close the logfile to truncate it so backfills can append
    # We can't pickle the file object and send it as an argument with
    # multiprocessing, so this is a workaround for that limitation
    with open(args.logfile, "w"):
      kwargs["logname"] = args.logfile

  nproc = 32
  nconfig = len(configs)
  imported_diffs = {}
  joined_diffs = {}
  with multiprocessing.Pool(processes=nproc) as pool:
    results = pool.imap_unordered(
        functools.partial(run_backfill, **kwargs), configs, chunksize=1)
    for ii, result in enumerate(results, 1):
      sys.stderr.write(
          utilities.clear_line("[{}/{}] Processing backfills".format(
              ii, nconfig)))

      if result:
        key, imported, joined = result
        imported_diffs[key] = imported
        joined_diffs[key] = joined

    sys.stderr.write(utilities.clear_line("Processing backfills"))

  # generate final über diff showing all the changes
  with open(args.imported_diff, "w") as ofile:
    for name, result in sorted(imported_diffs.items()):
      ofile.write("## ---------------------\n")
      ofile.write("## diff for {}\n".format(name))
      ofile.write("\n")
      ofile.write(result + "\n")

  if args.joined_diff:
    with open(args.joined_diff, "w") as ofile:
      for name, result in sorted(joined_diffs.items()):
        ofile.write("## ---------------------\n")
        ofile.write("## diff for {}\n".format(name))
        ofile.write("\n")
        ofile.write(result + "\n")


def main():
  parser = argparse.ArgumentParser(
      description=__doc__,
      formatter_class=argparse.RawTextHelpFormatter,
  )

  parser.add_argument(
      "--imported-diff",
      type=str,
      required=True,
      help="target file for diff on imported.jsonproto payload",
  )

  parser.add_argument(
      "--save-imported-payloads",
      type=str,
      help="target directory to save individual imported.jsonproto payloads",
  )

  parser.add_argument(
      "--diff-imported-against",
      type=str,
      help="source directory of individual imported.jsonproto payloads",
  )

  parser.add_argument(
      "--joined-diff",
      type=str,
      help="target file for diff on joined.jsonproto payload",
  )

  parser.add_argument(
      "--save-joined-payloads",
      type=str,
      help="target directory to save individual joined.jsonproto payloads",
  )

  parser.add_argument(
      "--diff-joined-against",
      type=str,
      help="source directory of individual joined.jsonproto payloads",
  )

  parser.add_argument(
      "-l",
      "--logfile",
      type=str,
      help="target file to log output from backfills",
  )
  args = parser.parse_args()

  # query BuildBucket for current builder configurations in the infra bucket
  data, status = utilities.call_and_spin(
      "Listing backfill builder",
      json.dumps({
          "id": {
              "project": "chromeos",
              "bucket": "infra",
              "builder": "backfiller"
          }
      }),
      "prpc",
      "call",
      "cr-buildbucket.appspot.com",
      "buildbucket.v2.Builders.GetBuilder",
  )

  if status != 0:
    print(
        "Error executing prpc call to list builders.  Try 'prpc login' first.",
        file=sys.stderr,
    )
    sys.exit(status)

  builder = json.loads(data)

  # construct backfill config from the configured builder properties
  configs = []
  for builder_config in parse_build_property(builder, "configs"):
    config = BackfillConfig(
        program=builder_config["program_name"],
        project=builder_config["project_name"],
        hwid_key=builder_config.get("hwid_key"),
        public_model=builder_config.get("public_yaml_path"),
        private_repo=builder_config.get("private_yaml", {}).get("repo"),
        private_model=builder_config.get("private_yaml", {}).get("path"),
    )

    path_repo = project_path / config.program / config.project
    if not path_repo.exists():
      logging.warning("{}/{} does not exist locally, skipping".format(
          config.program, config.project))
      continue

    configs.append(config)

  run_backfills(args, configs)


if __name__ == "__main__":
  main()