/external/XNNPACK/src/f32-dwconv/gen/ |
D | up4x25-minmax-wasmsimd-x86.c | 165 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() local 172 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 178 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 184 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 190 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 196 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 202 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 208 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 214 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() [all …]
|
D | up4x25-minmax-sse.c | 165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local 172 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 178 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 184 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 190 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 196 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 202 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 208 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 214 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 220 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() [all …]
|
D | up4x25-wasmsimd.c | 163 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() local 170 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 176 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 182 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 188 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 194 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 200 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 206 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 212 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 218 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() [all …]
|
D | up4x25-minmax-neonfma.c | 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() local 171 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 175 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 179 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 183 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 187 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 191 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 195 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 199 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 203 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() [all …]
|
D | up4x25-minmax-wasmsimd-arm.c | 165 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() local 172 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 178 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 184 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 190 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 196 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 202 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 208 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 214 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() [all …]
|
D | up4x25-minmax-neon.c | 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() local 171 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 175 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 179 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 183 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 187 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 191 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 195 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 199 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 203 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() [all …]
|
D | up8x25-minmax-wasmsimd-x86.c | 165 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() local 175 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 184 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 193 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 202 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 211 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 229 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 238 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 247 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() [all …]
|
D | up8x25-minmax-sse.c | 165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() local 175 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 184 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 193 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 202 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 211 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 220 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 229 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 238 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 247 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() [all …]
|
D | up8x25-minmax-neon.c | 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() local 174 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 181 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 188 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 195 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 202 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 209 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 216 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 223 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 230 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() [all …]
|
D | up8x25-minmax-neonfma.c | 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() local 174 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 181 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 188 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 195 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 202 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 209 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 216 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 223 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 230 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() [all …]
|
D | up8x25-wasmsimd.c | 163 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() local 173 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 182 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 191 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 200 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 209 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 218 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 227 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 236 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 245 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() [all …]
|
D | up8x25-minmax-wasmsimd-arm.c | 165 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() local 175 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 184 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 193 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 202 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 211 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 229 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 238 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 247 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() [all …]
|
D | up8x9-minmax-wasmsimd-x86.c | 85 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 95 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 113 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 131 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 149 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 167 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() [all …]
|
D | up8x9-minmax-neonfma.c | 86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 94 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 101 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 108 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 115 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 122 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 129 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 136 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 143 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 150 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() [all …]
|
D | up8x9-minmax-sse.c | 85 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 95 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 104 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 113 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 122 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 131 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 140 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 149 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 158 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 167 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() [all …]
|
D | up8x9-minmax-wasmsimd-arm.c | 85 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 95 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 113 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 131 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 149 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 167 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() [all …]
|
D | up8x9-minmax-neon.c | 86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 94 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 101 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 108 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 115 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 122 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 129 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 136 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 143 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 150 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() [all …]
|
D | up8x9-wasmsimd.c | 83 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 93 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 102 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 111 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 129 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 138 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 147 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 156 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 165 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() [all …]
|
D | up4x9-minmax-wasmsimd-x86.c | 85 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() local 92 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 98 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 110 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 116 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 128 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 134 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() [all …]
|
D | up4x9-wasmsimd.c | 83 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 90 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 96 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 102 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 108 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 114 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 126 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 132 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 138 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() [all …]
|
D | up4x9-minmax-wasmsimd-arm.c | 85 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 92 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 98 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 110 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 116 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 128 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 134 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() [all …]
|
D | up4x9-minmax-sse.c | 85 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() local 92 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 98 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 104 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 110 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 116 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 122 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 128 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 140 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() [all …]
|
D | up4x9-minmax-neon.c | 86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 91 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 95 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 99 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 103 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 107 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 111 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 115 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 119 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 123 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() [all …]
|
D | up4x9-minmax-neonfma.c | 86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() local 91 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 95 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 99 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 103 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 107 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 111 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 115 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 119 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 123 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() [all …]
|
D | up8x4-minmax-wasmsimd-x86.c | 60 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local 70 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 79 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 88 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 97 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 103 v128_t vacc0123 = wasm_v128_bitselect(vmin, vacc0123p0, wasm_f32x4_lt(vacc0123p0, vmin)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 114 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local 120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 126 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 132 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() [all …]
|