1name: apple-perf 2 3on: 4 schedule: 5 - cron: 0 1 * * * 6 # Note: GitHub has an upper limit of 10 inputs 7 workflow_dispatch: 8 inputs: 9 models: 10 description: Models to be benchmarked 11 required: false 12 type: string 13 default: stories110M 14 devices: 15 description: Target devices to run benchmark 16 required: false 17 type: string 18 default: apple_iphone_15 19 delegates: 20 description: Backend delegates 21 required: false 22 type: string 23 default: xnnpack 24 benchmark_configs: 25 description: The list of configs used the benchmark 26 required: false 27 type: string 28 test_spec: 29 description: The test spec to drive the test on AWS devices 30 required: false 31 type: string 32 workflow_call: 33 inputs: 34 models: 35 description: Models to be benchmarked 36 required: false 37 type: string 38 default: stories110M 39 devices: 40 description: Target devices to run benchmark 41 required: false 42 type: string 43 default: apple_iphone_15 44 delegates: 45 description: Backend delegates 46 required: false 47 type: string 48 default: xnnpack 49 benchmark_configs: 50 description: The list of configs used the benchmark 51 required: false 52 type: string 53 test_spec: 54 description: The test spec to drive the test on AWS devices 55 required: false 56 type: string 57 58concurrency: 59 group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} 60 cancel-in-progress: true 61 62jobs: 63 set-parameters: 64 runs-on: linux.2xlarge 65 outputs: 66 models: ${{ steps.set-parameters.outputs.models }} 67 devices: ${{ steps.set-parameters.outputs.devices }} 68 delegates: ${{ steps.set-parameters.outputs.delegates }} 69 steps: 70 - name: Set parameters 71 id: set-parameters 72 shell: bash 73 env: 74 # Separate default values from the workflow dispatch. To ensure defaults are accessible 75 # during scheduled runs and to provide flexibility for different defaults between 76 # on-demand and periodic benchmarking. 77 CRON_DEFAULT_MODELS: "stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l" 78 CRON_DEFAULT_DEVICES: "apple_iphone_15" 79 CRON_DEFAULT_DELEGATES: "xnnpack,coreml,mps" 80 run: | 81 set -ex 82 MODELS="${{ inputs.models }}" 83 if [ -z "$MODELS" ]; then 84 MODELS="$CRON_DEFAULT_MODELS" 85 fi 86 DEVICES="${{ inputs.devices }}" 87 if [ -z "$DEVICES" ]; then 88 DEVICES="$CRON_DEFAULT_DEVICES" 89 fi 90 DELEGATES="${{ inputs.delegates }}" 91 if [ -z "$DELEGATES" ]; then 92 DELEGATES="$CRON_DEFAULT_DELEGATES" 93 fi 94 95 # Mapping devices to their corresponding device-pool-arn 96 declare -A DEVICE_POOL_ARNS 97 DEVICE_POOL_ARNS[apple_iphone_15]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d" 98 99 # Resolve device names with their corresponding ARNs 100 if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then 101 DEVICES=$(echo "$DEVICES" | jq -Rc 'split(",")') 102 fi 103 declare -a MAPPED_ARNS=() 104 for DEVICE in $(echo "$DEVICES" | jq -r '.[]'); do 105 if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then 106 echo "Error: No ARN found for device '$DEVICE'. Abort." >&2 107 exit 1 108 fi 109 MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}") 110 done 111 112 echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT 113 MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" | jq -R . | jq -s .) 114 echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT 115 echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT 116 117 export-models: 118 name: export-models 119 uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.5 120 needs: set-parameters 121 strategy: 122 matrix: 123 model: ${{ fromJson(needs.set-parameters.outputs.models) }} 124 delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} 125 fail-fast: false 126 with: 127 # NB: Need to use our AWS MacOS runner to upload large models to S3 128 runner: macos-m1-stable 129 python-version: '3.11' 130 submodules: 'true' 131 timeout: 60 132 upload-artifact: ios-models 133 upload-artifact-to-s3: true 134 script: | 135 set -eux 136 137 echo "::group::Setting up CI environment" 138 .ci/scripts/setup-conda.sh 139 140 BUILD_TOOL=cmake 141 # Setup MacOS dependencies as there is no Docker support on MacOS atm 142 GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 143 .ci/scripts/setup-macos.sh "${BUILD_TOOL}" 144 145 if [[ ${{ matrix.delegate }} == "coreml" ]]; then 146 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 147 backends/apple/coreml/scripts/install_requirements.sh 148 fi 149 150 if [[ ${{ matrix.delegate }} == "mps" ]]; then 151 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 152 backends/apple/mps/install_requirements.sh 153 fi 154 155 ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }} 156 echo "::endgroup::" 157 158 echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}" 159 BUILD_MODE="cmake" 160 DTYPE="fp32" 161 162 if [[ ${{ matrix.model }} =~ ^stories* ]]; then 163 # Install requirements for export_llama 164 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 165 bash examples/models/llama/install_requirements.sh 166 167 # Test llama2 168 if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then 169 DELEGATE_CONFIG="xnnpack+custom+qe" 170 elif [[ ${{ matrix.delegate }} == "coreml" ]]; then 171 DELEGATE_CONFIG="coreml" 172 elif [[ ${{ matrix.delegate }} == "mps" ]]; then 173 DELEGATE_CONFIG="mps" 174 fi 175 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 176 bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}" 177 else 178 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 179 bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}" 180 fi 181 echo "::endgroup::" 182 183 build-benchmark-app: 184 name: build-benchmark-app 185 uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.5 186 needs: 187 - set-parameters 188 secrets: inherit 189 with: 190 runner: macos-latest-xlarge 191 python-version: '3.11' 192 submodules: 'true' 193 ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 194 upload-artifact: ios-apps 195 secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD 196 timeout: 90 197 script: | 198 set -eux 199 200 echo "::group::Setting up CI environment" 201 .ci/scripts/setup-conda.sh 202 203 BUILD_TOOL=cmake 204 # Setup MacOS dependencies as there is no Docker support on MacOS atm 205 GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 206 .ci/scripts/setup-macos.sh "${BUILD_TOOL}" 207 export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded 208 209 # Setup Apple certificate for iOS development 210 BUILD_PROVISION_PROFILE_BASE64="${SECRET_EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64}" \ 211 BUILD_CERTIFICATE_BASE64="${SECRET_BUILD_CERTIFICATE_BASE64}" \ 212 KEYCHAIN_PASSWORD="${SECRET_KEYCHAIN_PASSWORD}" \ 213 .ci/scripts/setup-ios.sh 214 215 # Install CoreML Backend Requirements 216 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 217 backends/apple/coreml/scripts/install_requirements.sh 218 219 # Install MPS Backend Requirements 220 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 221 backends/apple/mps/install_requirements.sh 222 echo "::endgroup::" 223 224 echo "::group::Build ExecuTorch iOS frameworks" 225 FRAMEWORKS=( 226 "executorch" 227 "backend_coreml" 228 "backend_mps" 229 "backend_xnnpack" 230 "kernels_custom" 231 "kernels_optimized" 232 "kernels_portable" 233 "kernels_quantized" 234 ) 235 236 # Build Release iOS Frameworks 237 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ 238 build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack 239 240 mkdir -p extension/benchmark/apple/Benchmark/Frameworks 241 for FRAMEWORK in "${FRAMEWORKS[@]}"; do ( 242 cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/benchmark/apple/Benchmark/Frameworks/ 243 ) done 244 echo "::endgroup::" 245 246 # NB: Although exported models can be copied to this directory and bundled together with the 247 # app, we don't use this in CI and rely on AWS extra data parameter to make the model and the 248 # tokenizer available to the benchmark. This decouples the app and the model. We just need to 249 # create the directory here to pass the build 250 mkdir -p extension/benchmark/apple/Benchmark/Models 251 ${CONDA_RUN} --no-capture-output \ 252 build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME} 253 254 upload-benchmark-app: 255 needs: build-benchmark-app 256 runs-on: linux.2xlarge 257 steps: 258 - name: Download the apps from GitHub 259 uses: actions/download-artifact@v3 260 with: 261 # The name here needs to match the name of the upload-artifact parameter 262 name: ios-apps 263 path: ${{ runner.temp }}/artifacts/ 264 265 - name: Verify the apps 266 shell: bash 267 working-directory: ${{ runner.temp }}/artifacts/ 268 run: | 269 ls -lah ./ 270 271 - name: Upload the apps to S3 272 uses: seemethere/upload-artifact-s3@v5 273 with: 274 s3-bucket: gha-artifacts 275 s3-prefix: | 276 ${{ github.repository }}/${{ github.run_id }}/artifacts 277 retention-days: 14 278 if-no-files-found: ignore 279 path: ${{ runner.temp }}/artifacts/ 280 281 benchmark-on-device: 282 if: always() 283 needs: 284 - set-parameters 285 - upload-benchmark-app 286 - export-models 287 permissions: 288 id-token: write 289 contents: read 290 uses: pytorch/test-infra/.github/workflows/mobile_job.yml@release/2.5 291 strategy: 292 matrix: 293 model: ${{ fromJson(needs.set-parameters.outputs.models) }} 294 delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} 295 device: ${{ fromJson(needs.set-parameters.outputs.devices) }} 296 fail-fast: false 297 with: 298 # Due to scheduling a job may be pushed beyond the default 60m threshold 299 timeout: 120 300 device-type: ios 301 # For iOS testing, the runner just needs to call AWS Device Farm, so there is no need to run this on macOS 302 runner: linux.2xlarge 303 test-infra-ref: '' 304 # This is the ARN of ExecuTorch project on AWS 305 project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6 306 device-pool-arn: ${{ matrix.device }} 307 # Uploaded to S3 from the previous job 308 ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa 309 ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip 310 test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }} 311 extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip 312 313 upload-benchmark-results: 314 needs: 315 - benchmark-on-device 316 if: always() 317 runs-on: linux.2xlarge 318 environment: upload-benchmark-results 319 permissions: 320 id-token: write 321 contents: read 322 steps: 323 - uses: actions/checkout@v3 324 with: 325 submodules: false 326 327 - name: Authenticate with AWS 328 uses: aws-actions/configure-aws-credentials@v4 329 with: 330 role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results 331 # The max duration enforced by the server side 332 role-duration-seconds: 18000 333 aws-region: us-east-1 334 335 - name: Setup conda 336 uses: pytorch/test-infra/.github/actions/setup-miniconda@main 337 with: 338 python-version: '3.10' 339 340 - name: Download the list of artifacts from S3 341 env: 342 ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/ 343 shell: bash 344 run: | 345 set -eux 346 ${CONDA_RUN} python -mpip install awscli==1.32.18 347 348 mkdir -p artifacts 349 pushd artifacts 350 ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" . 351 popd 352 353 ls -lah artifacts 354 355 - name: Extract the benchmark results JSON 356 shell: bash 357 run: | 358 set -eux 359 360 mkdir -p benchmark-results 361 362 for ARTIFACTS_BY_JOB in artifacts/*.json; do 363 [ -f "${ARTIFACTS_BY_JOB}" ] || break 364 echo "${ARTIFACTS_BY_JOB}" 365 ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \ 366 --artifacts "${ARTIFACTS_BY_JOB}" \ 367 --output-dir benchmark-results \ 368 --repo ${{ github.repository }} \ 369 --head-branch ${{ github.head_ref || github.ref_name }} \ 370 --workflow-name "${{ github.workflow }}" \ 371 --workflow-run-id ${{ github.run_id }} \ 372 --workflow-run-attempt ${{ github.run_attempt }} 373 done 374 375 ls -lah benchmark-results 376 377 for BENCHMARK_RESULTS in benchmark-results/*.json; do 378 cat "${BENCHMARK_RESULTS}" 379 echo 380 done 381 382 - name: Upload the benchmark results 383 uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main 384 with: 385 benchmark-results-dir: 'benchmark-results' 386 dry-run: false 387