.github/workflows/_bazel-build-test.yml

name: bazel

on:
  workflow_call:
    inputs:
      build-environment:
        required: true
        type: string
        description: Top-level label for what's being built/tested.
      docker-image-name:
        required: true
        type: string
        description: Name of the base docker image to build with.
      cuda-version:
        required: true
        type: string
        description: What CUDA version to build with (i.e. "11.7"), "cpu" for none.
      sync-tag:
        required: false
        type: string
        default: ""
        description: |
          If this is set, our linter will use this to make sure that every other
          job with the same `sync-tag` is identical.
      test-matrix:
        required: true
        type: string
        description: |
          A JSON description of what configs to run later on.

env:
  GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}

jobs:
  filter:
    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
      is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }}
      keep-going: ${{ steps.filter.outputs.keep-going }}
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false

      - name: Select all requested test configurations
        id: filter
        uses: ./.github/actions/filter-test-configs
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          test-matrix: ${{ inputs.test-matrix }}

  build-and-test:
    needs: filter
    # Don't run on forked repos.
    if: github.repository_owner == 'pytorch' && needs.filter.outputs.is-test-matrix-empty == 'False'
    strategy:
      matrix: ${{ fromJSON(needs.filter.outputs.test-matrix) }}
      fail-fast: false
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

      - name: Check if in a ARC runner
        shell: bash
        id: check_arc_runner
        run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.4
        if: ${{ inputs.cuda-version != 'cpu' && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}

      - name: Output disk space left
        run: |
          sudo df -H

      - name: Preserve github env variables for use in docker
        run: |
          env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
          env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"

      - name: Parse ref
        id: parse-ref
        run: .github/scripts/parse_ref.py

      - name: Get workflow job id
        id: get-job-id
        uses: ./.github/actions/get-workflow-job-id
        if: always()
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Build
        env:
          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
          BRANCH: ${{ steps.parse-ref.outputs.branch }}
          GITHUB_REPOSITORY: ${{ github.repository }}
          GITHUB_WORKFLOW: ${{ github.workflow }}
          GITHUB_JOB: ${{ github.job }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
          REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
          # TODO duplicated
          AWS_DEFAULT_REGION: us-east-1
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
          TORCH_CUDA_ARCH_LIST: 5.2
          DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
          OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
          CUDA_VERSION: ${{ inputs.cuda-version }}
        run: |
          export SHARD_NUMBER=0
          # detached container should get cleaned up by teardown_ec2_linux
          # TODO: Stop building test binaries as part of the build phase
          # Make sure we copy test results from bazel-testlogs symlink to
          # a regular directory ./test/test-reports
          # shellcheck disable=SC2086
          container_name=$(docker run \
            ${GPU_FLAG:-} \
            -e BUILD_ENVIRONMENT \
            -e GITHUB_ACTIONS \
            -e GITHUB_REPOSITORY \
            -e GITHUB_WORKFLOW \
            -e GITHUB_JOB \
            -e GITHUB_RUN_NUMBER \
            -e GITHUB_RUN_ATTEMPT \
            -e JOB_ID \
            -e GIT_DEFAULT_BRANCH="$GIT_DEFAULT_BRANCH" \
            -e SHARD_NUMBER \
            -e NUM_TEST_SHARDS \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
            -e SKIP_SCCACHE_INITIALIZATION=1 \
            -e REENABLED_ISSUES \
            -e TORCH_CUDA_ARCH_LIST \
            -e OUR_GITHUB_JOB_ID \
            -e CUDA_VERSION \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
            --shm-size="1g" \
            --tty \
            --detach \
            --user jenkins \
            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
            -w /var/lib/jenkins/workspace \
            "${DOCKER_IMAGE}"
          )
          docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'
          echo "container_id=${container_name}" >> "${GITHUB_ENV}"

      - name: Test
        id: test
        # Time out the test phase after 3.5 hours
        timeout-minutes: 120
        run: |
          docker exec -t "${container_id}" sh -c '.ci/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports'

      - name: Print remaining test logs
        shell: bash
        if: always() && steps.test.conclusion
        run: |
          cat test/**/*_toprint.log || true

      - name: Chown workspace
        uses: ./.github/actions/chown-workspace
        if: always()

      - name: Upload test artifacts
        uses: ./.github/actions/upload-test-artifacts
        if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
        with:
          file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}

      - name: Teardown Linux
        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()