/external/XNNPACK/src/f32-ppmm/gen/ |
D | 3x3-minmax-scalar.c | 44 float vacc0x0 = w[0]; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local 47 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 50 float vacc2x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 67 vacc0x0 += va0 * vb0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 81 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 92 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 109 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 126 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 131 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 140 *c0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
|
D | 2x4-minmax-scalar.c | 40 float vacc0x0 = w[0]; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 44 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 62 vacc0x0 += va0 * vb0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 75 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 85 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 99 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 114 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 118 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 125 *c0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 4x2-minmax-scalar.c | 48 float vacc0x0 = w[0]; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() local 50 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 52 float vacc2x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 54 float vacc3x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 70 vacc0x0 += va0 * vb0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 83 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 93 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 109 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 125 *c0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar()
|
D | 4x4-minmax-scalar.c | 48 float vacc0x0 = w[0]; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 52 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 56 float vacc2x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 60 float vacc3x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 80 vacc0x0 += va0 * vb0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 101 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 119 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 149 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 170 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 176 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() [all …]
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x1-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 43 float vacc2x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 45 float vacc4x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 46 float vacc5x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 47 float vacc6x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 48 float vacc7x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 62 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 72 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() [all …]
|
D | 4x1-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 43 float vacc2x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 54 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 60 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 130 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 131 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 139 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 143 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_4x1__scalar() [all …]
|
D | 8x2-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 43 float vacc2x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 45 float vacc4x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 46 float vacc5x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 47 float vacc6x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 48 float vacc7x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 71 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 89 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() [all …]
|
D | 2x1-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() 50 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() 54 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_2x1__scalar() 104 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() local 111 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() 114 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_2x1__scalar()
|
/external/XNNPACK/src/bf16-gemm/gen/ |
D | 1x4c8-minmax-neonbf16-bfmlal.c | 44 …float32x4_t vacc0x0 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() local 58 vacc0x0 = vbfmlalbq_f32(vacc0x0, va0, vb0); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 63 vacc0x0 = vbfmlaltq_f32(vacc0x0, va0, vb0); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 82 vacc0x0 = vbfmlalbq_f32(vacc0x0, va0x0, vb0); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 83 vacc0x0 = vbfmlaltq_f32(vacc0x0, va0x0, vb0); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 96 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 101 const float32x2_t vsum0x0 = vadd_f32(vget_low_f32(vacc0x0), vget_high_f32(vacc0x0)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
|
D | 1x4c8-minmax-neonfma-zip.c | 45 …float32x4_t vacc0x0 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local 66 vacc0x0 = vfmaq_f32(vacc0x0, va0e, vb0e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 78 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 111 vacc0x0 = vfmaq_f32(vacc0x0, va0x0e, vb0e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 126 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 133 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 138 const float32x2_t vsum0x0 = vadd_f32(vget_low_f32(vacc0x0), vget_high_f32(vacc0x0)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
|
D | 2x4c8-minmax-neonbf16-bfmlal.c | 50 …float32x4_t vacc0x0 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() local 54 float32x4_t vacc1x0 = vacc0x0; in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 69 vacc0x0 = vbfmlalbq_f32(vacc0x0, va0, vb0); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 78 vacc0x0 = vbfmlaltq_f32(vacc0x0, va0, vb0); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 102 vacc0x0 = vbfmlalbq_f32(vacc0x0, va0x0, vb0); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 103 vacc0x0 = vbfmlaltq_f32(vacc0x0, va0x0, vb0); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 128 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 136 const float32x2_t vsum0x0 = vadd_f32(vget_low_f32(vacc0x0), vget_high_f32(vacc0x0)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
|
D | 1x4c8-minmax-neonfma-shland.c | 45 …float32x4_t vacc0x0 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local 66 vacc0x0 = vfmaq_f32(vacc0x0, va0e, vb0e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 78 vacc0x0 = vfmaq_f32(vacc0x0, va0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 111 vacc0x0 = vfmaq_f32(vacc0x0, va0x0e, vb0e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 126 vacc0x0 = vfmaq_f32(vacc0x0, va0x0o, vb0o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 133 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 138 const float32x2_t vsum0x0 = vadd_f32(vget_low_f32(vacc0x0), vget_high_f32(vacc0x0)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
|
D | 1x4c8-minmax-neonbf16-bfdot.c | 44 …float32x4_t vacc0x0 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() local 58 vacc0x0 = vbfdotq_f32(vacc0x0, va0, vb0); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() 77 vacc0x0 = vbfdotq_f32(vacc0x0, va0x0, vb0); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() 87 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() 92 const float32x2_t vsum0x0 = vadd_f32(vget_low_f32(vacc0x0), vget_high_f32(vacc0x0)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c2-minmax-wasm-2x.c | 52 float vacc0x0 = i0[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() local 62 vacc0x0 = vacc0x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 67 vacc0x0 = __builtin_wasm_max_f32(vacc0x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 72 vacc0x0 = __builtin_wasm_min_f32(vacc0x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 77 o0[0] = vacc0x0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x()
|
D | c2-minmax-scalar-2x.c | 52 float vacc0x0 = i0[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() local 62 vacc0x0 = vacc0x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 67 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 72 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 77 o0[0] = vacc0x0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x()
|
D | c4-minmax-scalar-2x.c | 54 float vacc0x0 = i0[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 70 vacc0x0 = vacc0x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 79 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 88 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 97 o0[0] = vacc0x0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
D | c4-minmax-wasm-2x.c | 54 float vacc0x0 = i0[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 70 vacc0x0 = vacc0x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 79 vacc0x0 = __builtin_wasm_max_f32(vacc0x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 88 vacc0x0 = __builtin_wasm_min_f32(vacc0x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 97 o0[0] = vacc0x0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x1c4-minmax-fp32-armsimd32.c | 44 int32_t vacc0x0 = ((const int32_t*) w)[0]; in xnn_qs8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() local 57 vacc0x0 = __smlad(va0c02, vb0c02, vacc0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() 60 vacc0x0 = __smlad(va0c13, vb0c13, vacc0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() 65 float vfpacc0x0 = (float) vacc0x0; in xnn_qs8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32()
|
D | 2x1c4-minmax-fp32-armsimd32.c | 50 int32_t vacc0x0 = ((const int32_t*) w)[0]; in xnn_qs8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() local 51 int32_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 67 vacc0x0 = __smlad(va0c02, vb0c02, vacc0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 71 vacc0x0 = __smlad(va0c13, vb0c13, vacc0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 77 float vfpacc0x0 = (float) vacc0x0; in xnn_qs8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x1c4-minmax-fp32-armsimd32.c | 43 int32_t vacc0x0 = ((const int32_t*) w)[0]; in xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() local 56 vacc0x0 = __smlad(va0c02, vb0c02, vacc0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() 59 vacc0x0 = __smlad(va0c13, vb0c13, vacc0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() 64 float vfpacc0x0 = (float) vacc0x0; in xnn_qc8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32()
|
D | 2x1c4-minmax-fp32-armsimd32.c | 49 int32_t vacc0x0 = ((const int32_t*) w)[0]; in xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() local 50 int32_t vacc1x0 = vacc0x0; in xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 66 vacc0x0 = __smlad(va0c02, vb0c02, vacc0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 70 vacc0x0 = __smlad(va0c13, vb0c13, vacc0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 76 float vfpacc0x0 = (float) vacc0x0; in xnn_qc8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 1x1c4-minmax-fp32-armsimd32.c | 45 int32_t vacc0x0 = ((const int32_t*) w)[0]; in xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() local 58 vacc0x0 = __smlad(va0c02, vb0c02, vacc0x0); in xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() 61 vacc0x0 = __smlad(va0c13, vb0c13, vacc0x0); in xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32() 66 float vfpacc0x0 = (float) vacc0x0; in xnn_qu8_gemm_minmax_fp32_ukernel_1x1c4__armsimd32()
|
D | 2x1c4-minmax-fp32-armsimd32.c | 51 int32_t vacc0x0 = ((const int32_t*) w)[0]; in xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() local 52 int32_t vacc1x0 = vacc0x0; in xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 68 vacc0x0 = __smlad(va0c02, vb0c02, vacc0x0); in xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 72 vacc0x0 = __smlad(va0c13, vb0c13, vacc0x0); in xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32() 78 float vfpacc0x0 = (float) vacc0x0; in xnn_qu8_gemm_minmax_fp32_ukernel_2x1c4__armsimd32()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x1c4-minmax-fp32-armsimd32.c | 50 int32_t vacc0x0 = ((const int32_t*) w)[0]; in xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32() local 72 vacc0x0 = __smlad(va0c02, vb0c02, vacc0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32() 75 vacc0x0 = __smlad(va0c13, vb0c13, vacc0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32() 82 float vfpacc0x0 = (float) vacc0x0; in xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x1c4-minmax-fp32-armsimd32.c | 49 int32_t vacc0x0 = ((const int32_t*) w)[0]; in xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32() local 71 vacc0x0 = __smlad(va0c02, vb0c02, vacc0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32() 74 vacc0x0 = __smlad(va0c13, vb0c13, vacc0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32() 81 float vfpacc0x0 = (float) vacc0x0; in xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32()
|