/external/XNNPACK/src/qc8-gemm/gen/ |
D | 8x8c4-minmax-fp32-neondot.c | 224 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() local 225 vfpacc0x4567 = vmulq_f32(vfpacc0x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 226 vfpacc1x4567 = vmulq_f32(vfpacc1x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 227 vfpacc2x4567 = vmulq_f32(vfpacc2x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 228 vfpacc3x4567 = vmulq_f32(vfpacc3x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 229 vfpacc4x4567 = vmulq_f32(vfpacc4x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 230 vfpacc5x4567 = vmulq_f32(vfpacc5x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 231 vfpacc6x4567 = vmulq_f32(vfpacc6x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 232 vfpacc7x4567 = vmulq_f32(vfpacc7x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-fp32-neondot.c | 186 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() local 187 vfpacc0x4567 = vmulq_f32(vfpacc0x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 188 vfpacc1x4567 = vmulq_f32(vfpacc1x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 189 vfpacc2x4567 = vmulq_f32(vfpacc2x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 190 vfpacc3x4567 = vmulq_f32(vfpacc3x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 191 vfpacc4x4567 = vmulq_f32(vfpacc4x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 192 vfpacc5x4567 = vmulq_f32(vfpacc5x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-fp32-neondot.c | 148 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot() local 149 vfpacc0x4567 = vmulq_f32(vfpacc0x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot() 150 vfpacc1x4567 = vmulq_f32(vfpacc1x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot() 151 vfpacc2x4567 = vmulq_f32(vfpacc2x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot() 152 vfpacc3x4567 = vmulq_f32(vfpacc3x4567, vscale4567); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 8x8c4-minmax-fp32-neondot.c | 248 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() local 249 vfpacc0x4567 = vmulq_f32(vfpacc0x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 250 vfpacc1x4567 = vmulq_f32(vfpacc1x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 251 vfpacc2x4567 = vmulq_f32(vfpacc2x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 252 vfpacc3x4567 = vmulq_f32(vfpacc3x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 253 vfpacc4x4567 = vmulq_f32(vfpacc4x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 254 vfpacc5x4567 = vmulq_f32(vfpacc5x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 255 vfpacc6x4567 = vmulq_f32(vfpacc6x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 256 vfpacc7x4567 = vmulq_f32(vfpacc7x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-fp32-neondot.c | 206 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() local 207 vfpacc0x4567 = vmulq_f32(vfpacc0x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 208 vfpacc1x4567 = vmulq_f32(vfpacc1x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 209 vfpacc2x4567 = vmulq_f32(vfpacc2x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 210 vfpacc3x4567 = vmulq_f32(vfpacc3x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 211 vfpacc4x4567 = vmulq_f32(vfpacc4x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 212 vfpacc5x4567 = vmulq_f32(vfpacc5x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-fp32-neondot.c | 164 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot() local 165 vfpacc0x4567 = vmulq_f32(vfpacc0x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot() 166 vfpacc1x4567 = vmulq_f32(vfpacc1x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot() 167 vfpacc2x4567 = vmulq_f32(vfpacc2x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot() 168 vfpacc3x4567 = vmulq_f32(vfpacc3x4567, vscale4567); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot()
|
/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up8x3-minmax-fp32-neonv8-mla8-ld64.c | 84 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() local 87 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() 141 …const float32x4_t vscale4567 = vld1q_f32((const float*) ((uintptr_t) w + 0 * sizeof(int32_t) + 24 … in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() local 143 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64()
|
D | up8x3-minmax-fp32-neon-mla8-ld64.c | 84 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() local 87 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() 142 …const float32x4_t vscale4567 = vld1q_f32((const float*) ((uintptr_t) w + 0 * sizeof(int32_t) + 24 … in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() local 144 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64()
|
D | up8x3-minmax-fp32-sse41-mul16.c | 100 const __m128 vscale4567 = _mm_loadu_ps((const float*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() local 103 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() 168 …const __m128 vscale4567 = _mm_loadu_ps((const float*) ((uintptr_t) w + 8 * sizeof(int32_t) + 24 * … in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16() local 170 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse41_mul16()
|
D | up8x3-minmax-fp32-sse2-mul16.c | 103 const __m128 vscale4567 = _mm_loadu_ps((const float*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() local 106 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() 174 …const __m128 vscale4567 = _mm_loadu_ps((const float*) ((uintptr_t) w + 8 * sizeof(int32_t) + 24 * … in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16() local 176 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__sse2_mul16()
|
D | up16x3-minmax-fp32-neon-mla8-ld128.c | 95 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128() local 100 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128() 164 …const float32x4_t vscale4567 = vld1q_f32((const float*) ((uintptr_t) w + 8 * sizeof(int32_t) + 48 … in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128() local 166 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld128()
|
D | up16x3-minmax-fp32-neonv8-mla8-ld64.c | 101 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64() local 106 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64() 169 …const float32x4_t vscale4567 = vld1q_f32((const float*) ((uintptr_t) w + 8 * sizeof(int32_t) + 48 … in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64() local 171 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld64()
|
D | up16x3-minmax-fp32-neonv8-mla8-ld128.c | 95 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128() local 100 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128() 163 …const float32x4_t vscale4567 = vld1q_f32((const float*) ((uintptr_t) w + 8 * sizeof(int32_t) + 48 … in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128() local 165 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neonv8_mla8_ld128()
|
D | up16x3-minmax-fp32-neon-mla8-ld64.c | 101 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64() local 106 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64() 170 …const float32x4_t vscale4567 = vld1q_f32((const float*) ((uintptr_t) w + 8 * sizeof(int32_t) + 48 … in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64() local 172 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__neon_mla8_ld64()
|
D | up16x3-minmax-fp32-wasmsimd-mul16-add16.c | 106 const v128_t vscale4567 = wasm_v128_load((const float*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16() local 112 vacc4567 = wasm_f32x4_mul(vacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16() 184 …const v128_t vscale4567 = wasm_v128_load((const float*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48… in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16() local 187 vacc4567 = wasm_f32x4_mul(vacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__wasmsimd_mul16_add16()
|
D | up16x3-minmax-fp32-xop-mul16-add16.c | 126 const __m128 vscale4567 = _mm_loadu_ps((const float*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() local 131 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() 206 …const __m128 vscale4567 = _mm_loadu_ps((const float*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48 *… in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16() local 208 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__xop_mul16_add16()
|
D | up16x3-minmax-fp32-avx-mul16-add16.c | 121 const __m128 vscale4567 = _mm_loadu_ps((const float*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() local 126 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() 201 …const __m128 vscale4567 = _mm_loadu_ps((const float*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48 *… in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16() local 203 vscaled4567 = _mm_mul_ps(vscaled4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x3__avx_mul16_add16()
|
D | up8x9-minmax-fp32-neonv8-mla8-ld64.c | 150 …const float32x4_t vscale4567 = vld1q_f32((const float*) w); w = (const void*) ((const float*) w + … in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64() local 153 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64() 243 …const float32x4_t vscale4567 = vld1q_f32((const float*) ((uintptr_t) w + 0 * sizeof(int32_t) + 72 … in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64() local 245 vfpacc4567 = vmulq_f32(vfpacc4567, vscale4567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mla8_ld64()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c8-minmax-wasmsimd-arm-2x.c | 52 const v128_t vscale4567 = wasm_v128_load(w + 4); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() local 65 vacc0x4567 = wasm_f32x4_add(vbias4567, wasm_f32x4_mul(vscale4567, vacc0x4567)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 67 vacc1x4567 = wasm_f32x4_add(vbias4567, wasm_f32x4_mul(vscale4567, vacc1x4567)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x()
|
D | c8-minmax-wasmrelaxedsimd-fma-2x.c | 52 const v128_t vscale4567 = wasm_v128_load(w + 4); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x() local 65 vacc0x4567 = __builtin_wasm_fma_f32x4(vbias4567, vscale4567, vacc0x4567); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x() 67 vacc1x4567 = __builtin_wasm_fma_f32x4(vbias4567, vscale4567, vacc1x4567); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x()
|
D | c8-minmax-wasmrelaxedsimd-2x.c | 52 const v128_t vscale4567 = wasm_v128_load(w + 4); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x() local 65 vacc0x4567 = wasm_f32x4_add(vbias4567, wasm_f32x4_mul(vscale4567, vacc0x4567)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x() 67 vacc1x4567 = wasm_f32x4_add(vbias4567, wasm_f32x4_mul(vscale4567, vacc1x4567)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x()
|
D | c8-minmax-wasmsimd-x86-2x.c | 52 const v128_t vscale4567 = wasm_v128_load(w + 4); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() local 65 vacc0x4567 = wasm_f32x4_add(vbias4567, wasm_f32x4_mul(vscale4567, vacc0x4567)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 67 vacc1x4567 = wasm_f32x4_add(vbias4567, wasm_f32x4_mul(vscale4567, vacc1x4567)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x()
|
D | c8-minmax-neonfma-2x.c | 52 const float32x4_t vscale4567 = vld1q_f32(w); w += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x() local 64 vacc0x4567 = vfmaq_f32(vbias4567, vscale4567, vacc0x4567); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x() 66 vacc1x4567 = vfmaq_f32(vbias4567, vscale4567, vacc1x4567); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
|
D | c8-minmax-neon-2x.c | 52 const float32x4_t vscale4567 = vld1q_f32(w); w += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x() local 60 vacc0x4567 = vmulq_f32(vacc0x4567, vscale4567); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x() 62 vacc1x4567 = vmulq_f32(vacc1x4567, vscale4567); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
|
D | c8-minmax-sse-2x.c | 52 const __m128 vscale4567 = _mm_load_ps(w + 4); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x() local 62 vacc0x4567 = _mm_mul_ps(vacc0x4567, vscale4567); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x() 64 vacc1x4567 = _mm_mul_ps(vacc1x4567, vscale4567); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
|