• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <xnnpack/common.h>
12 
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16 
17 #define DECLARE_XX_TRANSPOSEV_UKERNEL_FUNCTION(fn_name)   \
18   XNN_INTERNAL void fn_name(const void* input,            \
19                             void* output,                 \
20                             size_t input_row_stride,      \
21                             size_t output_row_stride,     \
22                             size_t input_element_stride,  \
23                             size_t output_element_stride, \
24                             size_t element_size,          \
25                             size_t block_width,           \
26                             size_t block_height);
27 
28 DECLARE_XX_TRANSPOSEV_UKERNEL_FUNCTION(xnn_xx_transposev_ukernel__1x1_memcpy)
29 
30 #define DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(fn_name) \
31   XNN_INTERNAL void fn_name(const uint64_t* input,      \
32                             uint64_t* output,           \
33                             size_t input_stride,        \
34                             size_t output_stride,       \
35                             size_t block_width,         \
36                             size_t block_height);
37 
38 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__1x2_scalar_float)
39 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__1x2_scalar_int)
40 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x1_scalar_float)
41 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x1_scalar_int)
42 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x2_multi_mov_sse2)
43 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x2_multi_multi_sse2)
44 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x2_multi_switch_sse2)
45 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x2_reuse_mov_sse2)
46 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x2_reuse_multi_sse2)
47 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x2_reuse_switch_sse2)
48 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x2_scalar_float)
49 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__2x2_scalar_int)
50 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__4x1_scalar_float)
51 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__4x1_scalar_int)
52 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__4x2_scalar_float)
53 DECLARE_X64_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x64_transposec_ukernel__4x2_scalar_int)
54 
55 #define DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(fn_name) \
56   XNN_INTERNAL void fn_name(const uint32_t* input,      \
57                             uint32_t* output,           \
58                             size_t input_stride,        \
59                             size_t output_stride,       \
60                             size_t block_width,         \
61                             size_t block_height);
62 
63 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__1x2_scalar_float)
64 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__1x2_scalar_int)
65 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__1x4_scalar_float)
66 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__1x4_scalar_int)
67 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x1_scalar_float)
68 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x1_scalar_int)
69 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_multi_dec_zip_neon)
70 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_multi_mov_zip_neon)
71 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_multi_multi_zip_neon)
72 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_multi_switch_zip_neon)
73 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_reuse_dec_zip_neon)
74 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_reuse_mov_zip_neon)
75 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_reuse_multi_zip_neon)
76 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_reuse_switch_zip_neon)
77 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_scalar_float)
78 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x2_scalar_int)
79 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x4_scalar_float)
80 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__2x4_scalar_int)
81 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x1_scalar_float)
82 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x1_scalar_int)
83 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x2_scalar_float)
84 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x2_scalar_int)
85 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_aarch64_neon_tbl)
86 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_dec_zip_neon)
87 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_mov_sse2)
88 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_mov_wasmsimd)
89 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_mov_zip_neon)
90 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_multi_sse2)
91 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_multi_wasmsimd)
92 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_multi_zip_neon)
93 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_switch_sse2)
94 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_switch_wasmsimd)
95 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_multi_switch_zip_neon)
96 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_dec_zip_neon)
97 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_mov_sse2)
98 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_mov_wasmsimd)
99 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_mov_zip_neon)
100 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_multi_sse2)
101 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_multi_wasmsimd)
102 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_multi_zip_neon)
103 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_switch_sse2)
104 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_switch_wasmsimd)
105 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_reuse_switch_zip_neon)
106 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_scalar_float)
107 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_scalar_int)
108 DECLARE_X32_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x32_transposec_ukernel__4x4_sse)
109 
110 #define DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(fn_name) \
111   XNN_INTERNAL void fn_name(const void* input,      \
112                             void* output,           \
113                             size_t input_stride,        \
114                             size_t output_stride,       \
115                             size_t block_width,         \
116                             size_t block_height);
117 
118 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__1x2_scalar)
119 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__1x4_scalar)
120 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__2x1_scalar)
121 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__2x2_neon_tbl)
122 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__2x2_scalar)
123 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__2x4_scalar)
124 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__4x1_scalar)
125 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__4x2_scalar)
126 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__4x4_aarch64_neon_tbl)
127 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__4x4_scalar)
128 DECLARE_X24_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x24_transposec_ukernel__4x4_ssse3)
129 
130 #define DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(fn_name) \
131   XNN_INTERNAL void fn_name(const uint16_t* input,      \
132                             uint16_t* output,           \
133                             size_t input_stride,        \
134                             size_t output_stride,       \
135                             size_t block_width,         \
136                             size_t block_height);
137 
138 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__1x2_scalar_int)
139 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__1x4_scalar_int)
140 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__2x1_scalar_int)
141 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__2x2_scalar_int)
142 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__2x4_scalar_int)
143 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x1_scalar_int)
144 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x2_scalar_int)
145 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_multi_dec_zip_neon)
146 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_multi_mov_zip_neon)
147 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_multi_multi_zip_neon)
148 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_multi_switch_zip_neon)
149 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_reuse_dec_zip_neon)
150 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_reuse_mov_zip_neon)
151 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_reuse_multi_zip_neon)
152 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_reuse_switch_zip_neon)
153 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x4_scalar_int)
154 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__4x8_sse2)
155 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon)
156 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_multi_mov_sse2)
157 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd)
158 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon)
159 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_multi_switch_sse2)
160 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd)
161 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon)
162 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon)
163 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2)
164 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd)
165 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon)
166 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2)
167 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd)
168 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon)
169 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2)
170 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_switch_wasmsimd)
171 DECLARE_X16_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon)
172 
173 #define DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(fn_name)  \
174   XNN_INTERNAL void fn_name(const uint8_t* input,       \
175                             uint8_t* output,            \
176                             size_t input_stride,        \
177                             size_t output_stride,       \
178                             size_t block_width,         \
179                             size_t block_height);
180 
181 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__1x2_scalar_int)
182 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__1x4_scalar_int)
183 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__2x1_scalar_int)
184 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__2x2_scalar_int)
185 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__2x4_scalar_int)
186 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__4x1_scalar_int)
187 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__4x2_scalar_int)
188 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__4x4_scalar_int)
189 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon)
190 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon)
191 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon)
192 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon)
193 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon)
194 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon)
195 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon)
196 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon)
197 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__16x16_reuse_mov_sse2)
198 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__16x16_reuse_mov_wasmsimd)
199 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon)
200 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__16x16_reuse_switch_sse2)
201 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__16x16_reuse_switch_wasmsimd)
202 DECLARE_X8_TRANSPOSEC_UKERNEL_FUNCTION(xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon)
203 
204 #ifdef __cplusplus
205 }  // extern "C"
206 #endif
207