1#!/bin/sh 2# Copyright 2019 Google LLC 3# 4# This source code is licensed under the BSD-style license found in the 5# LICENSE file in the root directory of this source tree. 6 7#################################### Scalar ################################### 8### Generic C micro-kernels 9tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-scalar.c & 10tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-scalar.c & 11tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-scalar.c & 12tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-scalar.c & 13 14tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-scalar.c & 15tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-scalar.c & 16tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-scalar.c & 17tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-scalar.c & 18 19tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-scalar.c & 20tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-scalar.c & 21tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-scalar.c & 22tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-scalar.c & 23 24### WAsm-specific micro-kernels 25tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x4-wasm.c & 26tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/2x4-wasm.c & 27tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2-wasm.c & 28tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x4-wasm.c & 29 30tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x4-relu-wasm.c & 31tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/2x4-relu-wasm.c & 32tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2-relu-wasm.c & 33tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x4-relu-wasm.c & 34 35tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x4-minmax-wasm.c & 36tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/2x4-minmax-wasm.c & 37tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2-minmax-wasm.c & 38tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x4-minmax-wasm.c & 39 40################################## WAsm SIMD ################################## 41### LOAD1+BROADCAST micro-kernels 42tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-loadsplat.c & 43tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-loadsplat.c & 44tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-loadsplat.c & 45tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-loadsplat.c & 46tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-loadsplat.c & 47 48tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-loadsplat.c & 49tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-loadsplat.c & 50tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-loadsplat.c & 51tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-loadsplat.c & 52tools/xngen src/f32-igemm/wasmsimd-loadsplat.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-loadsplat.c & 53 54### LOAD4+DUPLICATE micro-kernels 55tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-arm-splat.c & 56tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-arm-splat.c & 57tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-arm-splat.c & 58tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-arm-splat.c & 59tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-arm-splat.c & 60 61tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/1x8-relu-wasmsimd-splat.c & 62tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x8-relu-wasmsimd-splat.c & 63tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/5x8-relu-wasmsimd-splat.c & 64 65tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/1x8-wasmsimd-splat.c & 66tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x8-wasmsimd-splat.c & 67tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/5x8-wasmsimd-splat.c & 68 69tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8-minmax-wasmsimd-x86-splat.c & 70tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8-minmax-wasmsimd-x86-splat.c & 71tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8-minmax-wasmsimd-x86-splat.c & 72tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8-minmax-wasmsimd-x86-splat.c & 73tools/xngen src/f32-igemm/wasmsimd-splat.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8-minmax-wasmsimd-x86-splat.c & 74 75### LOAD4+PERMUTE micro-kernels 76tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-arm.c & 77tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-arm.c & 78tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-arm.c & 79tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-arm.c & 80tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-arm.c & 81 82tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=1 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/1x8s4-minmax-wasmsimd-x86.c & 83tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=3 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/3x8s4-minmax-wasmsimd-x86.c & 84tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=4 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x8s4-minmax-wasmsimd-x86.c & 85tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=5 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/5x8s4-minmax-wasmsimd-x86.c & 86tools/xngen src/f32-igemm/wasmsimd-s4.c.in -D MR=6 -D NR=8 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/6x8s4-minmax-wasmsimd-x86.c & 87 88### MRx2 micro-kernels 89tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-arm.c & 90tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=1 -D ACTIVATION=MINMAX -o src/f32-igemm/gen/4x2c4-minmax-wasmsimd-x86.c & 91tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=RELU -o src/f32-igemm/gen/4x2c4-relu-wasmsimd.c & 92tools/xngen src/f32-igemm/MRx2c4-wasmsimd.c.in -D MR=4 -D NR=2 -D X86=0 -D ACTIVATION=LINEAR -o src/f32-igemm/gen/4x2c4-wasmsimd.c & 93 94############################### AArch64 assembly ############################## 95### LD64 micro-kernels 96tools/xngen src/f32-igemm/4x8-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-ld64.S & 97tools/xngen src/f32-igemm/6x8-aarch64-neonfma-ld64.S.in -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld64.S & 98 99### LD128 micro-kernels 100tools/xngen src/f32-igemm/4x8-aarch64-neonfma-ld128.S.in -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-ld128.S & 101tools/xngen src/f32-igemm/6x8-aarch64-neonfma-ld128.S.in -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld128.S & 102 103### Cortex A75 micro-kernels 104tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-cortex-a75.S & 105tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/1x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & 106tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-cortex-a75.S & 107tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & 108tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-cortex-a75.S & 109tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/5x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & 110tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-cortex-a75.S & 111tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-prfm-cortex-a75.S & 112 113############################### AArch32 assembly ############################## 114tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S & 115tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-prfm-cortex-a75.S & 116tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-cortex-a7.S.in -D PREFETCH=1 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-cortex-a7.S & 117tools/xngen src/f32-igemm/4x8-minmax-aarch32-neon-ld64.S.in -D PREFETCH=0 -o src/f32-igemm/gen/4x8-minmax-aarch32-neon-ld64.S & 118 119################################### ARM NEON ################################## 120### LD64 micro-kernels 121tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neon-lane-ld64.c & 122tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/1x8-minmax-neonfma-lane-ld64.c & 123tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neon-lane-ld64.c & 124tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x4-minmax-neonfma-lane-ld64.c & 125tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld64.c & 126tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld64.c & 127tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld64.c & 128tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld64.c & 129### LD128 micro-kernels 130tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neon-lane-ld128.c & 131tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-minmax-neonfma-lane-ld128.c & 132tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neon-lane-ld128.c & 133tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-minmax-neonfma-lane-ld128.c & 134### MRx2 micro-kernels- 135tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neon-lane-ld64.c & 136tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x2-minmax-neonfma-lane-ld64.c & 137### DUP LD64 micro-kernels 138tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neon-dup-ld64.c & 139tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=1 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/1x8-minmax-neonfma-dup-ld64.c & 140tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld64.c & 141tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld64.c & 142tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld64.c & 143tools/xngen src/f32-igemm/neon-ld64.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld64.c & 144### DUP LD128 micro-kernels 145tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neon-dup-ld128.c & 146tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-minmax-neonfma-dup-ld128.c & 147tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neon-dup-ld128.c & 148tools/xngen src/f32-igemm/neon-ld128.c.in -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-minmax-neonfma-dup-ld128.c & 149### LOAD4+PERMUTE micro-kernels 150tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8s4-minmax-neon.c & 151tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/1x8s4-minmax-neonfma.c & 152tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8s4-minmax-neon.c & 153tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/4x8s4-minmax-neonfma.c & 154tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8s4-minmax-neon.c & 155tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/6x8s4-minmax-neonfma.c & 156tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/8x8s4-minmax-neon.c & 157tools/xngen src/f32-igemm/neon-shuffle.c.in -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/8x8s4-minmax-neonfma.c & 158 159################################### x86 SSE ################################### 160### LOAD1+BROADCAST micro-kernels 161tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-minmax-sse-load1.c & 162tools/xngen src/f32-igemm/sse-load1.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8-minmax-sse-load1.c & 163tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-minmax-sse-load1.c & 164tools/xngen src/f32-igemm/sse-load1.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8-minmax-sse-load1.c & 165 166### LOAD4+DUPLICATE micro-kernels 167tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/1x8-minmax-sse-dup.c & 168tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/3x8-minmax-sse-dup.c & 169tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/4x8-minmax-sse-dup.c & 170tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=1 -o src/f32-igemm/gen/5x8-minmax-sse-dup.c & 171 172tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/1x8-minmax-sse2-dup.c & 173tools/xngen src/f32-igemm/sse-dup.c.in -D MR=3 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/3x8-minmax-sse2-dup.c & 174tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/4x8-minmax-sse2-dup.c & 175tools/xngen src/f32-igemm/sse-dup.c.in -D MR=5 -D NR=8 -D SSE=2 -o src/f32-igemm/gen/5x8-minmax-sse2-dup.c & 176 177### LOAD4+PERMUTE micro-kernels 178tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-minmax-sse.c & 179tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=3 -D NR=8 -o src/f32-igemm/gen/3x8s4-minmax-sse.c & 180tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-minmax-sse.c & 181tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=5 -D NR=8 -o src/f32-igemm/gen/5x8s4-minmax-sse.c & 182 183### MRx2 micro-kernels 184tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-minmax-sse.c & 185 186################################### x86 AVX ################################### 187### AVX+BROADCAST micro-kernels 188tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8-minmax-avx-broadcast.c & 189tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8-minmax-avx-broadcast.c & 190tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/5x8-minmax-avx-broadcast.c & 191tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8-minmax-avx-broadcast.c & 192tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/7x8-minmax-avx-broadcast.c & 193tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/1x16-minmax-avx-broadcast.c & 194tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/3x16-minmax-avx-broadcast.c & 195tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/4x16-minmax-avx-broadcast.c & 196tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/5x16-minmax-avx-broadcast.c & 197### FMA3+BROADCAST micro-kernels 198tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/1x8-minmax-fma3-broadcast.c & 199tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/4x8-minmax-fma3-broadcast.c & 200tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/5x8-minmax-fma3-broadcast.c & 201tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/6x8-minmax-fma3-broadcast.c & 202tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/7x8-minmax-fma3-broadcast.c & 203tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/8x8-minmax-fma3-broadcast.c & 204tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16-minmax-fma3-broadcast.c & 205tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16-minmax-fma3-broadcast.c & 206tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16-minmax-fma3-broadcast.c & 207tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16-minmax-fma3-broadcast.c & 208 209tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16s4-minmax-fma3-broadcast.c & 210tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16s4-minmax-fma3-broadcast.c & 211tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16s4-minmax-fma3-broadcast.c & 212tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16s4-minmax-fma3-broadcast.c & 213 214################################# x86 AVX-512 ################################# 215### AVX512F+BROADCAST micro-kernels 216tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -o src/f32-igemm/gen/1x16-minmax-avx512f-broadcast.c & 217tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -o src/f32-igemm/gen/4x16-minmax-avx512f-broadcast.c & 218tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -o src/f32-igemm/gen/5x16-minmax-avx512f-broadcast.c & 219tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -o src/f32-igemm/gen/6x16-minmax-avx512f-broadcast.c & 220tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -o src/f32-igemm/gen/7x16-minmax-avx512f-broadcast.c & 221tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -o src/f32-igemm/gen/8x16-minmax-avx512f-broadcast.c & 222 223################################## Unit tests ################################# 224tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc --output test/f32-igemm-2.cc & 225tools/generate-gemm-test.py --spec test/f32-igemm-relu.yaml --output test/f32-igemm-relu.cc --output test/f32-igemm-relu-2.cc & 226tools/generate-gemm-test.py --spec test/f32-igemm-minmax.yaml --output test/f32-igemm-minmax.cc --output test/f32-igemm-minmax-2.cc & 227 228wait 229