1#!/bin/sh 2# Copyright 2019 Google LLC 3# 4# This source code is licensed under the BSD-style license found in the 5# LICENSE file in the root directory of this source tree. 6 7#################################### Scalar ################################### 8### Generic C micro-kernels 9tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -o src/f32-igemm/gen/1x4-scalar.c 10tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -o src/f32-igemm/gen/2x4-scalar.c 11tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -o src/f32-igemm/gen/4x2-scalar.c 12tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -o src/f32-igemm/gen/4x4-scalar.c 13 14### WAsm-specific micro-kernels 15tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -o src/f32-igemm/gen/1x4-wasm.c 16tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -o src/f32-igemm/gen/2x4-wasm.c 17tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -o src/f32-igemm/gen/4x2-wasm.c 18tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -o src/f32-igemm/gen/4x4-wasm.c 19 20############################### AArch64 assembly ############################## 21# Cortex A75 / A57 micro-kernels 22tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a57.S 23tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a75.S 24tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a57.S 25tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a75.S 26tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a57.S 27tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a75.S 28tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a57.S 29tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a75.S 30 31############################### AArch32 assembly ############################## 32tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/4x8-aarch32-neon-cortex-a75.S 33tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/4x8-aarch32-neon-pld-cortex-a75.S 34 35################################### ARM NEON ################################## 36### LD64 micro-kernels 37tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/1x8-neon-lane-ld64.c 38tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/1x8-neonfma-lane-ld64.c 39tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x4-neon-lane-ld64.c 40tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x4-neonfma-lane-ld64.c 41tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-neon-lane-ld64.c 42tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-neonfma-lane-ld64.c 43tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-neon-lane-ld64.c 44tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-neonfma-lane-ld64.c 45### LD128 micro-kernels 46tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-neon-lane-ld128.c 47tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-neonfma-lane-ld128.c 48tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-neon-lane-ld128.c 49tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-neonfma-lane-ld128.c 50### MRx2 micro-kernels- 51tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x2-neon-lane-ld64.c 52tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x2-neonfma-lane-ld64.c 53### DUP LD64 micro-kernels 54tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/1x8-neon-dup-ld64.c 55tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/1x8-neonfma-dup-ld64.c 56tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-neon-dup-ld64.c 57tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-neonfma-dup-ld64.c 58tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-neon-dup-ld64.c 59tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-neonfma-dup-ld64.c 60### DUP LD128 micro-kernels 61tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-neon-dup-ld128.c 62tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-neonfma-dup-ld128.c 63tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-neon-dup-ld128.c 64tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-neonfma-dup-ld128.c 65### LOAD4+PERMUTE micro-kernels 66tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8s4-neon.c 67tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/1x8s4-neonfma.c 68tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8s4-neon.c 69tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/4x8s4-neonfma.c 70tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8s4-neon.c 71tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/6x8s4-neonfma.c 72tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/8x8s4-neon.c 73tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/8x8s4-neonfma.c 74 75#################################### PSIMD #################################### 76### LOAD1+BROADCAST micro-kernels 77tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-psimd-loadsplat.c 78tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-psimd-loadsplat.c 79tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8-psimd-loadsplat.c 80### LOAD4+DUPLICATE micro-kernels 81tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-psimd-splat.c 82tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-psimd-splat.c 83tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8-psimd-splat.c 84### LOAD4+PERMUTE micro-kernels 85tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-psimd.c 86tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-psimd.c 87tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8s4-psimd.c 88### MRx2 micro-kernels 89tools/xngen src/f32-igemm/MRx2c4-psimd.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-psimd.c 90 91################################### x86 SSE ################################### 92### LOAD1+BROADCAST micro-kernels 93tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-sse-load1.c 94tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-sse-load1.c 95### LOAD4+DUPLICATE micro-kernels 96tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-sse-dup.c 97tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-sse-dup.c 98### LOAD4+PERMUTE micro-kernels 99tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-sse.c 100tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-sse.c 101### MRx2 micro-kernels 102tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-sse.c 103 104################################### x86 AVX ################################### 105### AVX+BROADCAST micro-kernels 106tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8-avx-broadcast.c 107tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8-avx-broadcast.c 108tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/5x8-avx-broadcast.c 109tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8-avx-broadcast.c 110tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/7x8-avx-broadcast.c 111tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/1x16-avx-broadcast.c 112tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/3x16-avx-broadcast.c 113tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/4x16-avx-broadcast.c 114tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/5x16-avx-broadcast.c 115### FMA3+BROADCAST micro-kernels 116tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/1x8-fma3-broadcast.c 117tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/4x8-fma3-broadcast.c 118tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/5x8-fma3-broadcast.c 119tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/6x8-fma3-broadcast.c 120tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/7x8-fma3-broadcast.c 121tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/8x8-fma3-broadcast.c 122tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16-fma3-broadcast.c 123tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16-fma3-broadcast.c 124tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16-fma3-broadcast.c 125tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16-fma3-broadcast.c 126 127tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16s4-fma3-broadcast.c 128tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16s4-fma3-broadcast.c 129tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16s4-fma3-broadcast.c 130tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16s4-fma3-broadcast.c 131 132################################# x86 AVX-512 ################################# 133### AVX512F+BROADCAST micro-kernels 134tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -o src/f32-igemm/gen/1x16-avx512f-broadcast.c 135tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -o src/f32-igemm/gen/4x16-avx512f-broadcast.c 136tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -o src/f32-igemm/gen/5x16-avx512f-broadcast.c 137tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -o src/f32-igemm/gen/6x16-avx512f-broadcast.c 138tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -o src/f32-igemm/gen/7x16-avx512f-broadcast.c 139tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -o src/f32-igemm/gen/8x16-avx512f-broadcast.c 140 141################################## Unit tests ################################# 142tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc 143