name: android-perf on: schedule: - cron: 0 0 * * * # Note: GitHub has an upper limit of 10 inputs workflow_dispatch: inputs: models: description: Models to be benchmarked required: false type: string default: stories110M devices: description: Target devices to run benchmark required: false type: string default: samsung_galaxy_s22 delegates: description: Backend delegates required: false type: string default: xnnpack threadpool: description: Run with threadpool? required: false type: boolean default: false benchmark_configs: description: The list of configs used the benchmark required: false type: string test_spec: description: The test spec to drive the test on AWS devices required: false type: string workflow_call: inputs: models: description: Models to be benchmarked required: false type: string default: stories110M devices: description: Target devices to run benchmark required: false type: string default: samsung_galaxy_s22 delegates: description: Backend delegates required: false type: string default: xnnpack threadpool: description: Run with threadpool? required: false type: boolean default: false benchmark_configs: description: The list of configs used the benchmark required: false type: string test_spec: description: The test spec to drive the test on AWS devices required: false type: string concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} cancel-in-progress: true jobs: set-parameters: runs-on: linux.2xlarge outputs: models: ${{ steps.set-parameters.outputs.models }} devices: ${{ steps.set-parameters.outputs.devices }} delegates: ${{ steps.set-parameters.outputs.delegates }} steps: - name: Set parameters id: set-parameters shell: bash env: # Separate default values from the workflow dispatch. To ensure defaults are accessible # during scheduled runs and to provide flexibility for different defaults between # on-demand and periodic benchmarking. CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3,vit" CRON_DEFAULT_DEVICES: "samsung_galaxy_s22" CRON_DEFAULT_DELEGATES: "xnnpack,qnn" run: | set -ex MODELS="${{ inputs.models }}" if [ -z "$MODELS" ]; then MODELS="$CRON_DEFAULT_MODELS" fi DEVICES="${{ inputs.devices }}" if [ -z "$DEVICES" ]; then DEVICES="$CRON_DEFAULT_DEVICES" fi DELEGATES="${{ inputs.delegates }}" if [ -z "$DELEGATES" ]; then DELEGATES="$CRON_DEFAULT_DELEGATES" fi # Mapping devices to their corresponding device-pool-arn declare -A DEVICE_POOL_ARNS DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa" DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db" # Resolve device names with their corresponding ARNs if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then DEVICES=$(echo "$DEVICES" | jq -Rc 'split(",")') fi declare -a MAPPED_ARNS=() for DEVICE in $(echo "$DEVICES" | jq -r '.[]'); do if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then echo "Error: No ARN found for device '$DEVICE'. Abort." >&2 exit 1 fi MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}") done echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" | jq -R . | jq -s .) echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT export-models: name: export-models uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.5 needs: set-parameters strategy: matrix: model: ${{ fromJson(needs.set-parameters.outputs.models) }} delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} fail-fast: false with: runner: linux.4xlarge docker-image: executorch-ubuntu-22.04-qnn-sdk submodules: 'true' timeout: 60 upload-artifact: android-models upload-artifact-to-s3: true script: | # The generic Linux job chooses to use base env, not the one setup by the image echo "::group::Setting up dev environment" CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" if [[ ${{ matrix.delegate }} == "qnn" ]]; then PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh fi PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }} echo "::endgroup::" echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}" BUILD_MODE="cmake" DTYPE="fp32" if [[ ${{ matrix.model }} =~ ^stories* ]]; then # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh # Test llama2 if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then DELEGATE_CONFIG="xnnpack+custom+qe" elif [[ ${{ matrix.delegate }} == "qnn" ]]; then DELEGATE_CONFIG="qnn" else echo "Unsupported delegate ${{ matrix.delegate }}" exit 1 fi PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}" else PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}" fi echo "::endgroup::" build-benchmark-app: name: build-benchmark-app uses: pytorch/test-infra/.github/workflows/linux_job.yml@main needs: set-parameters with: runner: linux.2xlarge docker-image: executorch-ubuntu-22.04-clang12-android submodules: 'true' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout: 90 upload-artifact: android-apps upload-artifact-to-s3: true script: | set -eux # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh export ANDROID_ABIS="arm64-v8a" PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME} # Let's see how expensive this job is, we might want to tone it down by running it periodically benchmark-on-device: if: always() permissions: id-token: write contents: read uses: pytorch/test-infra/.github/workflows/mobile_job.yml@release/2.5 needs: - set-parameters - build-benchmark-app - export-models strategy: matrix: model: ${{ fromJson(needs.set-parameters.outputs.models) }} delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} device: ${{ fromJson(needs.set-parameters.outputs.devices) }} fail-fast: false with: # Due to scheduling a job may be pushed beyond the default 60m threshold timeout: 120 device-type: android runner: linux.2xlarge test-infra-ref: '' # This is the ARN of ExecuTorch project on AWS project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 device-pool-arn: ${{ matrix.device }} android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk # NB: Need to set the default spec here so that it works for periodic too test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }} # Uploaded to S3 from the previous job extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip upload-benchmark-results: needs: - benchmark-on-device if: always() runs-on: linux.2xlarge environment: upload-benchmark-results permissions: id-token: write contents: read steps: - uses: actions/checkout@v3 with: submodules: false - name: Authenticate with AWS uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results # The max duration enforced by the server side role-duration-seconds: 18000 aws-region: us-east-1 - name: Setup conda uses: pytorch/test-infra/.github/actions/setup-miniconda@main with: python-version: '3.10' - name: Download the list of artifacts from S3 env: ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/ shell: bash run: | set -eux ${CONDA_RUN} python -mpip install awscli==1.32.18 mkdir -p artifacts pushd artifacts ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" . popd ls -lah artifacts - name: Extract the benchmark results JSON shell: bash run: | set -eux mkdir -p benchmark-results for ARTIFACTS_BY_JOB in artifacts/*.json; do [ -f "${ARTIFACTS_BY_JOB}" ] || break echo "${ARTIFACTS_BY_JOB}" ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ --artifacts "${ARTIFACTS_BY_JOB}" \ --output-dir benchmark-results \ --repo ${{ github.repository }} \ --head-branch ${{ github.head_ref || github.ref_name }} \ --workflow-name "${{ github.workflow }}" \ --workflow-run-id ${{ github.run_id }} \ --workflow-run-attempt ${{ github.run_attempt }} done ls -lah benchmark-results for BENCHMARK_RESULTS in benchmark-results/*.json; do cat "${BENCHMARK_RESULTS}" echo done - name: Upload the benchmark results uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: benchmark-results-dir: 'benchmark-results' dry-run: false