Home
last modified time | relevance | path

Searched refs:output_decrement (Results 1 – 25 of 107) sorted by relevance

12345

/external/XNNPACK/src/f32-spmm/gen/
D12x1-minmax-neonfma.c34 size_t output_decrement = output_stride * nc - 12 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() local
71 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
76 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
105 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
108 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
131 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
134 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
157 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
160 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
183 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
D12x1-minmax-neon.c34 size_t output_decrement = output_stride * nc - 12 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon() local
71 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
76 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
105 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
108 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
131 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
134 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
157 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
160 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
183 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
D8x1-minmax-wasmsimd-arm.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm() local
63 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm()
68 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm()
92 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm()
95 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm()
119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm()
122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm()
146 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm()
D16x1-minmax-neonfma.c34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() local
77 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
82 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
111 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
114 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
137 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
140 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
163 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
166 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
189 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
D16x1-minmax-neon.c34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon() local
77 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
82 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
111 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
114 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
137 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
140 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
163 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
166 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
189 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
D16x1-minmax-sse.c34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse() local
75 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
80 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
109 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
112 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
135 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
138 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
163 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
166 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
189 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
D16x1-minmax-wasmsimd-arm.c34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() local
75 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
80 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
110 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
113 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
137 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
140 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
164 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
167 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
191 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
D8x1-minmax-wasmsimd-x86.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86() local
63 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86()
68 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86()
92 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86()
95 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86()
119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86()
122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86()
146 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86()
D8x1-minmax-neonfma.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() local
65 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
70 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
93 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
96 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
145 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
D8x1-minmax-neon.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon() local
65 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
70 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
93 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
96 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
145 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
D8x1-minmax-wasmsimd-arm-pipelined.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined() local
71 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined()
76 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined()
100 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined()
103 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined()
127 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined()
130 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined()
154 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined()
D8x1-minmax-neonfma-pipelined.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() local
70 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
75 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
100 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
103 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
128 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
131 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
156 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
D8x1-minmax-sse.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__sse() local
63 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__sse()
68 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__sse()
91 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__sse()
94 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__sse()
119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__sse()
122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__sse()
145 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__sse()
D8x1-minmax-neon-pipelined.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() local
70 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
75 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
100 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
103 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
128 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
131 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
156 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
D16x1-minmax-neon-pipelined.c34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() local
84 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
89 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
120 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
123 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
148 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
151 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
176 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
179 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
204 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
D16x1-minmax-neonfma-pipelined.c34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() local
84 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
89 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
120 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
123 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
148 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
151 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
176 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
179 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
204 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
D16x1-minmax-wasmsimd-arm-pipelined.c34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() local
85 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
90 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
120 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
123 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
147 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
150 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
174 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
177 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
201 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
D32x1-minmax-neon.c34 size_t output_decrement = output_stride * nc - 32 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local
102 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
107 output_decrement += 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
148 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
151 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
180 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
183 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
206 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
209 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
232 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
[all …]
D32x1-minmax-neonfma.c34 size_t output_decrement = output_stride * nc - 32 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local
102 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
107 output_decrement += 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
148 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
151 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
180 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
183 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
206 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
209 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
232 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
[all …]
D8x1-minmax-wasmsimd-x86-pipelined.c34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined() local
71 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined()
76 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined()
100 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined()
103 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined()
127 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined()
130 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined()
154 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined()
/external/XNNPACK/src/f16-spmm/gen/
D32x1-minmax-neonfp16arith.c39 size_t output_decrement = output_stride * nc - 32 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() local
84 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
89 output_decrement += 16 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
118 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
121 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
144 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
147 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
170 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
173 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
196 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
[all …]
D24x1-minmax-neonfp16arith.c39 size_t output_decrement = output_stride * nc - 24 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() local
77 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
82 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
111 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
114 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
137 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
140 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
163 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
166 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
189 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
[all …]
D16x1-minmax-neonfp16arith.c39 size_t output_decrement = output_stride * nc - 16 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() local
70 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
75 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
98 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
101 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
124 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
127 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
150 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
153 output_decrement += 1 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
176 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
D8x1-minmax-neonfp16arith.c39 size_t output_decrement = output_stride * nc - 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith() local
63 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith()
68 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith()
91 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith()
94 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith()
117 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith()
120 output_decrement += 1 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith()
143 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith()
D24x1-minmax-neonfp16arith-x2.c39 size_t output_decrement = output_stride * nc - 24 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() local
107 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
112 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
141 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
144 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
167 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
170 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
193 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
196 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
219 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
[all …]

12345