• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/sh
2# Copyright 2019 Google LLC
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7#################################### Scalar ###################################
8### Generic C micro-kernels
9tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -o src/f32-igemm/gen/1x4-scalar.c
10tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -o src/f32-igemm/gen/2x4-scalar.c
11tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -o src/f32-igemm/gen/4x2-scalar.c
12tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -o src/f32-igemm/gen/4x4-scalar.c
13
14### WAsm-specific micro-kernels
15tools/xngen src/f32-igemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -o src/f32-igemm/gen/1x4-wasm.c
16tools/xngen src/f32-igemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -o src/f32-igemm/gen/2x4-wasm.c
17tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=1 -o src/f32-igemm/gen/4x2-wasm.c
18tools/xngen src/f32-igemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=1 -o src/f32-igemm/gen/4x4-wasm.c
19
20############################### AArch64 assembly ##############################
21# Cortex A75 / A57 micro-kernels
22tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in  -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a57.S
23tools/xngen src/f32-igemm/1x8-aarch64-neonfma-cortex-a75.S.in  -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/1x8-aarch64-neonfma-cortex-a75.S
24tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in  -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a57.S
25tools/xngen src/f32-igemm/4x8-aarch64-neonfma-cortex-a75.S.in  -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/4x8-aarch64-neonfma-cortex-a75.S
26tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in  -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a57.S
27tools/xngen src/f32-igemm/5x8-aarch64-neonfma-cortex-a75.S.in  -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/5x8-aarch64-neonfma-cortex-a75.S
28tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in  -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a57.S
29tools/xngen src/f32-igemm/6x8-aarch64-neonfma-cortex-a75.S.in  -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/6x8-aarch64-neonfma-cortex-a75.S
30
31############################### AArch32 assembly ##############################
32tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in  -D INC=0 -D PREFETCH=0 -o src/f32-igemm/gen/4x8-aarch32-neon-cortex-a75.S
33tools/xngen src/f32-igemm/4x8-aarch32-neon-cortex-a75.S.in  -D INC=0 -D PREFETCH=1 -o src/f32-igemm/gen/4x8-aarch32-neon-pld-cortex-a75.S
34
35################################### ARM NEON ##################################
36### LD64 micro-kernels
37tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=1 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/1x8-neon-lane-ld64.c
38tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=1 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/1x8-neonfma-lane-ld64.c
39tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=4 -D NR=4 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x4-neon-lane-ld64.c
40tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=4 -D NR=4 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x4-neonfma-lane-ld64.c
41tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-neon-lane-ld64.c
42tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-neonfma-lane-ld64.c
43tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-neon-lane-ld64.c
44tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-neonfma-lane-ld64.c
45### LD128 micro-kernels
46tools/xngen src/f32-igemm/neon-ld128.c.in     -D MR=4 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x8-neon-lane-ld128.c
47tools/xngen src/f32-igemm/neon-ld128.c.in     -D MR=4 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x8-neonfma-lane-ld128.c
48tools/xngen src/f32-igemm/neon-ld128.c.in     -D MR=6 -D NR=8 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/6x8-neon-lane-ld128.c
49tools/xngen src/f32-igemm/neon-ld128.c.in     -D MR=6 -D NR=8 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/6x8-neonfma-lane-ld128.c
50### MRx2 micro-kernels-
51tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=0 -D DUP=0 -o src/f32-igemm/gen/4x2-neon-lane-ld64.c
52tools/xngen src/f32-igemm/MRx2-neon-ld64.c.in -D MR=4 -D NR=2 -D FMA=1 -D DUP=0 -o src/f32-igemm/gen/4x2-neonfma-lane-ld64.c
53### DUP LD64 micro-kernels
54tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=1 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/1x8-neon-dup-ld64.c
55tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=1 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/1x8-neonfma-dup-ld64.c
56tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-neon-dup-ld64.c
57tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-neonfma-dup-ld64.c
58tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-neon-dup-ld64.c
59tools/xngen src/f32-igemm/neon-ld64.c.in      -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-neonfma-dup-ld64.c
60### DUP LD128 micro-kernels
61tools/xngen src/f32-igemm/neon-ld128.c.in     -D MR=4 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/4x8-neon-dup-ld128.c
62tools/xngen src/f32-igemm/neon-ld128.c.in     -D MR=4 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/4x8-neonfma-dup-ld128.c
63tools/xngen src/f32-igemm/neon-ld128.c.in     -D MR=6 -D NR=8 -D FMA=0 -D DUP=1 -o src/f32-igemm/gen/6x8-neon-dup-ld128.c
64tools/xngen src/f32-igemm/neon-ld128.c.in     -D MR=6 -D NR=8 -D FMA=1 -D DUP=1 -o src/f32-igemm/gen/6x8-neonfma-dup-ld128.c
65### LOAD4+PERMUTE micro-kernels
66tools/xngen src/f32-igemm/neon-shuffle.c.in   -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8s4-neon.c
67tools/xngen src/f32-igemm/neon-shuffle.c.in   -D MR=1 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/1x8s4-neonfma.c
68tools/xngen src/f32-igemm/neon-shuffle.c.in   -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8s4-neon.c
69tools/xngen src/f32-igemm/neon-shuffle.c.in   -D MR=4 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/4x8s4-neonfma.c
70tools/xngen src/f32-igemm/neon-shuffle.c.in   -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8s4-neon.c
71tools/xngen src/f32-igemm/neon-shuffle.c.in   -D MR=6 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/6x8s4-neonfma.c
72tools/xngen src/f32-igemm/neon-shuffle.c.in   -D MR=8 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/8x8s4-neon.c
73tools/xngen src/f32-igemm/neon-shuffle.c.in   -D MR=8 -D NR=8 -D FMA=1 -o src/f32-igemm/gen/8x8s4-neonfma.c
74
75#################################### PSIMD ####################################
76### LOAD1+BROADCAST micro-kernels
77tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-psimd-loadsplat.c
78tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-psimd-loadsplat.c
79tools/xngen src/f32-igemm/psimd-loadsplat.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8-psimd-loadsplat.c
80### LOAD4+DUPLICATE micro-kernels
81tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-psimd-splat.c
82tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-psimd-splat.c
83tools/xngen src/f32-igemm/psimd-splat.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8-psimd-splat.c
84### LOAD4+PERMUTE micro-kernels
85tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-psimd.c
86tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-psimd.c
87tools/xngen src/f32-igemm/psimd-s4.c.in -D MR=6 -D NR=8 -o src/f32-igemm/gen/6x8s4-psimd.c
88### MRx2 micro-kernels
89tools/xngen src/f32-igemm/MRx2c4-psimd.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-psimd.c
90
91################################### x86 SSE ###################################
92### LOAD1+BROADCAST micro-kernels
93tools/xngen src/f32-igemm/sse-load1.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-sse-load1.c
94tools/xngen src/f32-igemm/sse-load1.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-sse-load1.c
95### LOAD4+DUPLICATE micro-kernels
96tools/xngen src/f32-igemm/sse-dup.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8-sse-dup.c
97tools/xngen src/f32-igemm/sse-dup.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8-sse-dup.c
98### LOAD4+PERMUTE micro-kernels
99tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=1 -D NR=8 -o src/f32-igemm/gen/1x8s4-sse.c
100tools/xngen src/f32-igemm/sse-shuffle.c.in -D MR=4 -D NR=8 -o src/f32-igemm/gen/4x8s4-sse.c
101### MRx2 micro-kernels
102tools/xngen src/f32-igemm/MRx2c4-sse.c.in -D MR=4 -D NR=2 -o src/f32-igemm/gen/4x2c4-sse.c
103
104################################### x86 AVX ###################################
105### AVX+BROADCAST micro-kernels
106tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/1x8-avx-broadcast.c
107tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/4x8-avx-broadcast.c
108tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/5x8-avx-broadcast.c
109tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/6x8-avx-broadcast.c
110tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=0 -o src/f32-igemm/gen/7x8-avx-broadcast.c
111tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/1x16-avx-broadcast.c
112tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/3x16-avx-broadcast.c
113tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/4x16-avx-broadcast.c
114tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=0 -o src/f32-igemm/gen/5x16-avx-broadcast.c
115### FMA3+BROADCAST micro-kernels
116tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/1x8-fma3-broadcast.c
117tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/4x8-fma3-broadcast.c
118tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/5x8-fma3-broadcast.c
119tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=6 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/6x8-fma3-broadcast.c
120tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=7 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/7x8-fma3-broadcast.c
121tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=8 -D NR=8 -D FMA=3 -o src/f32-igemm/gen/8x8-fma3-broadcast.c
122tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16-fma3-broadcast.c
123tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16-fma3-broadcast.c
124tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16-fma3-broadcast.c
125tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16-fma3-broadcast.c
126
127tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16s4-fma3-broadcast.c
128tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16s4-fma3-broadcast.c
129tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16s4-fma3-broadcast.c
130tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16s4-fma3-broadcast.c
131
132################################# x86 AVX-512 #################################
133### AVX512F+BROADCAST micro-kernels
134tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -o src/f32-igemm/gen/1x16-avx512f-broadcast.c
135tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=4 -D NR=16 -o src/f32-igemm/gen/4x16-avx512f-broadcast.c
136tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=5 -D NR=16 -o src/f32-igemm/gen/5x16-avx512f-broadcast.c
137tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=6 -D NR=16 -o src/f32-igemm/gen/6x16-avx512f-broadcast.c
138tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=7 -D NR=16 -o src/f32-igemm/gen/7x16-avx512f-broadcast.c
139tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=8 -D NR=16 -o src/f32-igemm/gen/8x16-avx512f-broadcast.c
140
141################################## Unit tests #################################
142tools/generate-gemm-test.py --spec test/f32-igemm.yaml --output test/f32-igemm.cc
143