/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x9-minmax-neon-acc2.c | 134 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 136 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 194 const float32x4_t vk6x0123 = vld1q_f32(w + 52); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 195 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 242 const float32x4_t vk6x0123 = vld1q_f32(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 243 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2()
|
D | up8x9-minmax-neonfma.c | 134 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 136 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 191 const float32x4_t vk6x0123 = vld1q_f32(w + 52); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 192 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 237 const float32x4_t vk6x0123 = vld1q_f32(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 238 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
|
D | up8x9-minmax-sse.c | 147 const __m128 vk6x0123 = _mm_load_ps(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 149 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 224 const __m128 vk6x0123 = _mm_load_ps(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 225 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 276 const __m128 vk6x0123 = _mm_load_ps(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 277 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
|
D | up8x9-minmax-sse-acc2.c | 147 const __m128 vk6x0123 = _mm_load_ps(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 149 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 227 const __m128 vk6x0123 = _mm_load_ps(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 228 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 281 const __m128 vk6x0123 = _mm_load_ps(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 282 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2()
|
D | up8x9-minmax-wasmsimd-arm.c | 147 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 149 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 225 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 226 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 277 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 278 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
|
D | up8x9-minmax-neon.c | 134 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 136 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 191 const float32x4_t vk6x0123 = vld1q_f32(w + 52); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 192 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 237 const float32x4_t vk6x0123 = vld1q_f32(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 238 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
|
D | up8x9-minmax-wasmsimd-arm-acc2.c | 147 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 149 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 228 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 229 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 282 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 283 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2()
|
D | up8x9-minmax-neonfma-acc2.c | 134 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 136 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 194 const float32x4_t vk6x0123 = vld1q_f32(w + 52); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 195 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 242 const float32x4_t vk6x0123 = vld1q_f32(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 243 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2()
|
D | up8x9-wasmsimd.c | 145 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 147 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 220 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 221 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 271 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 272 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
|
D | up8x9-minmax-wasmsimd-x86.c | 147 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 149 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 225 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 226 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 277 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 278 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
|
D | up4x9-wasmsimd.c | 125 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 126 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 176 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 177 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
|
D | up4x9-minmax-wasmsimd-arm.c | 127 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 128 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 180 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 181 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
|
D | up4x9-minmax-sse.c | 127 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() local 128 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() 179 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() local 180 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
|
D | up4x9-minmax-neon.c | 114 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 115 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 160 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 161 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
|
D | up4x9-minmax-neonfma.c | 114 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() local 115 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() 160 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() local 161 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
|
D | up8x9-minmax-wasmsimd-x86-acc2.c | 147 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 149 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 228 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 229 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 282 const v128_t vk6x0123 = wasm_v128_load(w + 56); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 283 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2()
|
D | up4x9-minmax-wasmsimd-x86.c | 127 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() local 128 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() 180 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() local 181 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
|
D | up4x9-minmax-sse-acc2.c | 127 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() local 128 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 181 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() local 182 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2()
|
D | up4x9-minmax-neon-acc2.c | 114 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() local 115 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 162 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() local 163 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2()
|
D | up4x9-minmax-wasmsimd-arm-acc2.c | 127 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() local 128 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 182 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() local 183 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2()
|
D | up4x9-minmax-wasmsimd-x86-acc2.c | 127 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() local 128 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 182 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() local 183 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2()
|
D | up4x9-minmax-neonfma-acc2.c | 114 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() local 115 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 162 const float32x4_t vk6x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() local 163 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2()
|
D | up4x25-minmax-wasmsimd-x86.c | 207 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() local 208 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 356 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() local 357 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
|
D | up4x25-minmax-wasmsimd-x86-acc2.c | 207 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() local 208 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 358 const v128_t vk6x0123 = wasm_v128_load(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() local 359 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2()
|
D | up4x25-minmax-sse.c | 207 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local 208 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 355 const __m128 vk6x0123 = _mm_load_ps(w + 28); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local 356 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
|