1name: android-perf 2 3on: 4 schedule: 5 - cron: 0 0 * * * 6 # Note: GitHub has an upper limit of 10 inputs 7 workflow_dispatch: 8 inputs: 9 models: 10 description: Models to be benchmarked 11 required: false 12 type: string 13 default: stories110M 14 devices: 15 description: Target devices to run benchmark 16 required: false 17 type: string 18 default: samsung_galaxy_s22 19 delegates: 20 description: Backend delegates 21 required: false 22 type: string 23 default: xnnpack 24 threadpool: 25 description: Run with threadpool? 26 required: false 27 type: boolean 28 default: false 29 benchmark_configs: 30 description: The list of configs used the benchmark 31 required: false 32 type: string 33 test_spec: 34 description: The test spec to drive the test on AWS devices 35 required: false 36 type: string 37 workflow_call: 38 inputs: 39 models: 40 description: Models to be benchmarked 41 required: false 42 type: string 43 default: stories110M 44 devices: 45 description: Target devices to run benchmark 46 required: false 47 type: string 48 default: samsung_galaxy_s22 49 delegates: 50 description: Backend delegates 51 required: false 52 type: string 53 default: xnnpack 54 threadpool: 55 description: Run with threadpool? 56 required: false 57 type: boolean 58 default: false 59 benchmark_configs: 60 description: The list of configs used the benchmark 61 required: false 62 type: string 63 test_spec: 64 description: The test spec to drive the test on AWS devices 65 required: false 66 type: string 67 68concurrency: 69 group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} 70 cancel-in-progress: true 71 72jobs: 73 set-parameters: 74 runs-on: linux.2xlarge 75 outputs: 76 models: ${{ steps.set-parameters.outputs.models }} 77 devices: ${{ steps.set-parameters.outputs.devices }} 78 delegates: ${{ steps.set-parameters.outputs.delegates }} 79 steps: 80 - name: Set parameters 81 id: set-parameters 82 shell: bash 83 env: 84 # Separate default values from the workflow dispatch. To ensure defaults are accessible 85 # during scheduled runs and to provide flexibility for different defaults between 86 # on-demand and periodic benchmarking. 87 CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3,vit" 88 CRON_DEFAULT_DEVICES: "samsung_galaxy_s22" 89 CRON_DEFAULT_DELEGATES: "xnnpack,qnn" 90 run: | 91 set -ex 92 MODELS="${{ inputs.models }}" 93 if [ -z "$MODELS" ]; then 94 MODELS="$CRON_DEFAULT_MODELS" 95 fi 96 DEVICES="${{ inputs.devices }}" 97 if [ -z "$DEVICES" ]; then 98 DEVICES="$CRON_DEFAULT_DEVICES" 99 fi 100 DELEGATES="${{ inputs.delegates }}" 101 if [ -z "$DELEGATES" ]; then 102 DELEGATES="$CRON_DEFAULT_DELEGATES" 103 fi 104 105 # Mapping devices to their corresponding device-pool-arn 106 declare -A DEVICE_POOL_ARNS 107 DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa" 108 DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db" 109 110 # Resolve device names with their corresponding ARNs 111 if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then 112 DEVICES=$(echo "$DEVICES" | jq -Rc 'split(",")') 113 fi 114 declare -a MAPPED_ARNS=() 115 for DEVICE in $(echo "$DEVICES" | jq -r '.[]'); do 116 if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then 117 echo "Error: No ARN found for device '$DEVICE'. Abort." >&2 118 exit 1 119 fi 120 MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}") 121 done 122 123 echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT 124 MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" | jq -R . | jq -s .) 125 echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT 126 echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT 127 128 export-models: 129 name: export-models 130 uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.5 131 needs: set-parameters 132 strategy: 133 matrix: 134 model: ${{ fromJson(needs.set-parameters.outputs.models) }} 135 delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} 136 fail-fast: false 137 with: 138 runner: linux.4xlarge 139 docker-image: executorch-ubuntu-22.04-qnn-sdk 140 submodules: 'true' 141 timeout: 60 142 upload-artifact: android-models 143 upload-artifact-to-s3: true 144 script: | 145 # The generic Linux job chooses to use base env, not the one setup by the image 146 echo "::group::Setting up dev environment" 147 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") 148 conda activate "${CONDA_ENV}" 149 if [[ ${{ matrix.delegate }} == "qnn" ]]; then 150 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh 151 PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh 152 fi 153 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" 154 ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }} 155 echo "::endgroup::" 156 157 echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}" 158 BUILD_MODE="cmake" 159 DTYPE="fp32" 160 161 if [[ ${{ matrix.model }} =~ ^stories* ]]; then 162 # Install requirements for export_llama 163 PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh 164 # Test llama2 165 if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then 166 DELEGATE_CONFIG="xnnpack+custom+qe" 167 elif [[ ${{ matrix.delegate }} == "qnn" ]]; then 168 DELEGATE_CONFIG="qnn" 169 else 170 echo "Unsupported delegate ${{ matrix.delegate }}" 171 exit 1 172 fi 173 PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}" 174 else 175 PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}" 176 fi 177 echo "::endgroup::" 178 179 build-benchmark-app: 180 name: build-benchmark-app 181 uses: pytorch/test-infra/.github/workflows/linux_job.yml@main 182 needs: set-parameters 183 with: 184 runner: linux.2xlarge 185 docker-image: executorch-ubuntu-22.04-clang12-android 186 submodules: 'true' 187 ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 188 timeout: 90 189 upload-artifact: android-apps 190 upload-artifact-to-s3: true 191 script: | 192 set -eux 193 194 # The generic Linux job chooses to use base env, not the one setup by the image 195 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") 196 conda activate "${CONDA_ENV}" 197 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake 198 export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded 199 200 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh 201 PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh 202 203 export ANDROID_ABIS="arm64-v8a" 204 PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME} 205 206 # Let's see how expensive this job is, we might want to tone it down by running it periodically 207 benchmark-on-device: 208 if: always() 209 permissions: 210 id-token: write 211 contents: read 212 uses: pytorch/test-infra/.github/workflows/mobile_job.yml@release/2.5 213 needs: 214 - set-parameters 215 - build-benchmark-app 216 - export-models 217 strategy: 218 matrix: 219 model: ${{ fromJson(needs.set-parameters.outputs.models) }} 220 delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} 221 device: ${{ fromJson(needs.set-parameters.outputs.devices) }} 222 fail-fast: false 223 with: 224 # Due to scheduling a job may be pushed beyond the default 60m threshold 225 timeout: 120 226 device-type: android 227 runner: linux.2xlarge 228 test-infra-ref: '' 229 # This is the ARN of ExecuTorch project on AWS 230 project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 231 device-pool-arn: ${{ matrix.device }} 232 android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk 233 android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk 234 # NB: Need to set the default spec here so that it works for periodic too 235 test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }} 236 # Uploaded to S3 from the previous job 237 extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip 238 239 upload-benchmark-results: 240 needs: 241 - benchmark-on-device 242 if: always() 243 runs-on: linux.2xlarge 244 environment: upload-benchmark-results 245 permissions: 246 id-token: write 247 contents: read 248 steps: 249 - uses: actions/checkout@v3 250 with: 251 submodules: false 252 253 - name: Authenticate with AWS 254 uses: aws-actions/configure-aws-credentials@v4 255 with: 256 role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results 257 # The max duration enforced by the server side 258 role-duration-seconds: 18000 259 aws-region: us-east-1 260 261 - name: Setup conda 262 uses: pytorch/test-infra/.github/actions/setup-miniconda@main 263 with: 264 python-version: '3.10' 265 266 - name: Download the list of artifacts from S3 267 env: 268 ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/ 269 shell: bash 270 run: | 271 set -eux 272 ${CONDA_RUN} python -mpip install awscli==1.32.18 273 274 mkdir -p artifacts 275 pushd artifacts 276 ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" . 277 popd 278 279 ls -lah artifacts 280 281 - name: Extract the benchmark results JSON 282 shell: bash 283 run: | 284 set -eux 285 286 mkdir -p benchmark-results 287 288 for ARTIFACTS_BY_JOB in artifacts/*.json; do 289 [ -f "${ARTIFACTS_BY_JOB}" ] || break 290 echo "${ARTIFACTS_BY_JOB}" 291 ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ 292 --artifacts "${ARTIFACTS_BY_JOB}" \ 293 --output-dir benchmark-results \ 294 --repo ${{ github.repository }} \ 295 --head-branch ${{ github.head_ref || github.ref_name }} \ 296 --workflow-name "${{ github.workflow }}" \ 297 --workflow-run-id ${{ github.run_id }} \ 298 --workflow-run-attempt ${{ github.run_attempt }} 299 done 300 301 ls -lah benchmark-results 302 303 for BENCHMARK_RESULTS in benchmark-results/*.json; do 304 cat "${BENCHMARK_RESULTS}" 305 echo 306 done 307 308 - name: Upload the benchmark results 309 uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main 310 with: 311 benchmark-results-dir: 'benchmark-results' 312 dry-run: false 313