• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 #include <xnnpack/common.h>
11 #include <xnnpack/params.h>
12 
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16 
17 #define DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(fn_name) \
18   XNN_INTERNAL void fn_name(const uint64_t* input,      \
19                             uint64_t* output,           \
20                             size_t input_stride,        \
21                             size_t output_stride,       \
22                             size_t block_width,         \
23                             size_t block_height);
24 
25 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__1x2_scalar_int)
26 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x1_scalar_int)
27 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x2_scalar_int)
28 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__4x1_scalar_int)
29 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__4x2_scalar_int)
30 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__1x2_scalar_float)
31 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x1_scalar_float)
32 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x2_scalar_float)
33 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__4x1_scalar_float)
34 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__4x2_scalar_float)
35 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2)
36 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2)
37 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2)
38 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2)
39 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2)
40 DECLARE_X64_TRANSPOSE_UKERNEL_FUNCTION(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2)
41 
42 #define DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(fn_name) \
43   XNN_INTERNAL void fn_name(const uint32_t* input,      \
44                             uint32_t* output,           \
45                             size_t input_stride,        \
46                             size_t output_stride,       \
47                             size_t block_width,         \
48                             size_t block_height);
49 
50 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__1x2_scalar_int)
51 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__1x4_scalar_int)
52 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__2x1_scalar_int)
53 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__2x2_scalar_int)
54 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__2x4_scalar_int)
55 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x1_scalar_int)
56 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x2_scalar_int)
57 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_scalar_int)
58 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__1x2_scalar_float)
59 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__1x4_scalar_float)
60 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__2x1_scalar_float)
61 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__2x2_scalar_float)
62 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__2x4_scalar_float)
63 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x1_scalar_float)
64 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x2_scalar_float)
65 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_scalar_float)
66 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl)
67 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_sse)
68 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_wasmsimd)
69 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon)
70 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon)
71 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon)
72 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon)
73 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon)
74 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon)
75 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon)
76 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon)
77 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2)
78 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2)
79 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2)
80 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2)
81 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2)
82 DECLARE_X32_TRANSPOSE_UKERNEL_FUNCTION(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2)
83 
84 #define DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(fn_name) \
85   XNN_INTERNAL void fn_name(const uint16_t* input,      \
86                             uint16_t* output,           \
87                             size_t input_stride,        \
88                             size_t output_stride,       \
89                             size_t block_width,         \
90                             size_t block_height);
91 
92 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__1x2_scalar_int)
93 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__1x4_scalar_int)
94 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__2x1_scalar_int)
95 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__2x2_scalar_int)
96 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__2x4_scalar_int)
97 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__4x1_scalar_int)
98 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__4x2_scalar_int)
99 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__4x4_scalar_int)
100 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon)
101 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon)
102 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon)
103 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon)
104 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon)
105 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon)
106 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon)
107 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__4x8_sse2)
108 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2)
109 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2)
110 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2)
111 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2)
112 DECLARE_X16_TRANSPOSE_UKERNEL_FUNCTION(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2)
113 
114 #define DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(fn_name)  \
115   XNN_INTERNAL void fn_name(const uint8_t* input,       \
116                             uint8_t* output,            \
117                             size_t input_stride,        \
118                             size_t output_stride,       \
119                             size_t block_width,         \
120                             size_t block_height);
121 
122 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__1x2_scalar_int)
123 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__1x4_scalar_int)
124 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__2x1_scalar_int)
125 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__2x2_scalar_int)
126 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__2x4_scalar_int)
127 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__4x1_scalar_int)
128 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__4x2_scalar_int)
129 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__4x4_scalar_int)
130 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon)
131 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon)
132 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon)
133 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon)
134 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2)
135 DECLARE_X8_TRANSPOSE_UKERNEL_FUNCTION(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2)
136 
137 #ifdef __cplusplus
138 }  // extern "C"
139 #endif
140