/external/XNNPACK/src/f32-ppmm/gen/ |
D | 4x2-minmax-scalar.c | 49 float vacc0x1 = w[1]; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() local 51 float vacc1x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 53 float vacc2x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 55 float vacc3x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 74 vacc0x1 += va0 * vb1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 87 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 97 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 110 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar()
|
D | 3x3-minmax-scalar.c | 45 float vacc0x1 = w[1]; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local 48 float vacc1x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 51 float vacc2x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 70 vacc0x1 += va0 * vb1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 84 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 95 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 110 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 127 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
|
D | 2x4-minmax-scalar.c | 41 float vacc0x1 = w[1]; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 45 float vacc1x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 64 vacc0x1 += va0 * vb1; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 77 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 87 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 100 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 115 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 4x4-minmax-scalar.c | 49 float vacc0x1 = w[1]; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 53 float vacc1x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 57 float vacc2x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 61 float vacc3x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 84 vacc0x1 += va0 * vb1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 105 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 123 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 150 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 171 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/bf16-gemm/gen/ |
D | 1x4c8-minmax-neonbf16-bfmlal.c | 45 …float32x4_t vacc0x1 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() local 59 vacc0x1 = vbfmlalbq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 64 vacc0x1 = vbfmlaltq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 85 vacc0x1 = vbfmlalbq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 86 vacc0x1 = vbfmlaltq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 96 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() 102 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
|
D | 1x4c8-minmax-neonfma-zip.c | 46 …float32x4_t vacc0x1 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local 67 vacc0x1 = vfmaq_f32(vacc0x1, va0e, vb1e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 79 vacc0x1 = vfmaq_f32(vacc0x1, va0o, vb1o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 112 vacc0x1 = vfmaq_f32(vacc0x1, va0x1e, vb1e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 127 vacc0x1 = vfmaq_f32(vacc0x1, va0x1o, vb1o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 133 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() 139 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
|
D | 2x4c8-minmax-neonbf16-bfmlal.c | 51 …float32x4_t vacc0x1 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() local 55 float32x4_t vacc1x1 = vacc0x1; in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 71 vacc0x1 = vbfmlalbq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 80 vacc0x1 = vbfmlaltq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 108 vacc0x1 = vbfmlalbq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 109 vacc0x1 = vbfmlaltq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 128 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() 138 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
|
D | 1x4c8-minmax-neonfma-shland.c | 46 …float32x4_t vacc0x1 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local 67 vacc0x1 = vfmaq_f32(vacc0x1, va0e, vb1e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 79 vacc0x1 = vfmaq_f32(vacc0x1, va0o, vb1o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 112 vacc0x1 = vfmaq_f32(vacc0x1, va0x1e, vb1e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 127 vacc0x1 = vfmaq_f32(vacc0x1, va0x1o, vb1o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 133 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() 139 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
|
D | 1x4c8-minmax-neonbf16-bfdot.c | 45 …float32x4_t vacc0x1 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() local 59 vacc0x1 = vbfdotq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() 79 vacc0x1 = vbfdotq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() 87 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() 93 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot()
|
D | 3x4c8-minmax-neonbf16-bfmlal.c | 57 …float32x4_t vacc0x1 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() local 61 float32x4_t vacc1x1 = vacc0x1; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() 65 float32x4_t vacc2x1 = vacc0x1; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() 83 vacc0x1 = vbfmlalbq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() 96 vacc0x1 = vbfmlaltq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() 131 vacc0x1 = vbfmlalbq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() 132 vacc0x1 = vbfmlaltq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() 160 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() 174 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
|
D | 2x4c8-minmax-neonfma-shland.c | 52 …float32x4_t vacc0x1 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local 56 float32x4_t vacc1x1 = vacc0x1; in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() 80 vacc0x1 = vfmaq_f32(vacc0x1, va0e, vb1e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() 97 vacc0x1 = vfmaq_f32(vacc0x1, va0o, vb1o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() 143 vacc0x1 = vfmaq_f32(vacc0x1, va0x1e, vb1e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() 166 vacc0x1 = vfmaq_f32(vacc0x1, va0x1o, vb1o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() 175 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() 185 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
|
D | 2x4c8-minmax-neonfma-zip.c | 52 …float32x4_t vacc0x1 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local 56 float32x4_t vacc1x1 = vacc0x1; in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() 80 vacc0x1 = vfmaq_f32(vacc0x1, va0e, vb1e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() 97 vacc0x1 = vfmaq_f32(vacc0x1, va0o, vb1o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() 143 vacc0x1 = vfmaq_f32(vacc0x1, va0x1e, vb1e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() 166 vacc0x1 = vfmaq_f32(vacc0x1, va0x1o, vb1o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() 175 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() 185 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
|
D | 4x4c8-minmax-neonbf16-bfmlal.c | 63 …float32x4_t vacc0x1 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() local 67 float32x4_t vacc1x1 = vacc0x1; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() 71 float32x4_t vacc2x1 = vacc0x1; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() 75 float32x4_t vacc3x1 = vacc0x1; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() 95 vacc0x1 = vbfmlalbq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() 112 vacc0x1 = vbfmlaltq_f32(vacc0x1, va0, vb1); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() 154 vacc0x1 = vbfmlalbq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() 155 vacc0x1 = vbfmlaltq_f32(vacc0x1, va0x1, vb1); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() 192 const float32x4_t vacc0x01 = vpaddq_f32(vacc0x0, vacc0x1); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() 210 const float32x2_t vsum0x1 = vadd_f32(vget_low_f32(vacc0x1), vget_high_f32(vacc0x1)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c2-minmax-wasm-2x.c | 53 float vacc0x1 = i0[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() local 63 vacc0x1 = vacc0x1 * vscale1 + vbias1; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 68 vacc0x1 = __builtin_wasm_max_f32(vacc0x1, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 73 vacc0x1 = __builtin_wasm_min_f32(vacc0x1, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 78 o0[1] = vacc0x1; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x()
|
D | c2-minmax-scalar-2x.c | 53 float vacc0x1 = i0[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() local 63 vacc0x1 = vacc0x1 * vscale1 + vbias1; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 68 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 73 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 78 o0[1] = vacc0x1; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x()
|
D | c4-minmax-scalar-2x.c | 55 float vacc0x1 = i0[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 71 vacc0x1 = vacc0x1 * vscale1 + vbias1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 80 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 89 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 98 o0[1] = vacc0x1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
D | c4-minmax-wasm-2x.c | 55 float vacc0x1 = i0[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 71 vacc0x1 = vacc0x1 * vscale1 + vbias1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 80 vacc0x1 = __builtin_wasm_max_f32(vacc0x1, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 89 vacc0x1 = __builtin_wasm_min_f32(vacc0x1, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 98 o0[1] = vacc0x1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x2-minmax-scalar.c | 49 float vacc0x1 = *w++; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 50 float vacc1x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 51 float vacc2x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 52 float vacc3x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 53 float vacc4x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 54 float vacc5x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 55 float vacc6x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 56 float vacc7x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 79 vacc0x1 += vi0 * vw1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 97 float vout0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() [all …]
|
D | 8x4-minmax-scalar.c | 49 float vacc0x1 = *w++; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 50 float vacc1x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 51 float vacc2x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 52 float vacc3x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 53 float vacc4x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 54 float vacc5x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 55 float vacc6x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 56 float vacc7x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 97 vacc0x1 += vi0 * vw1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 131 float vout0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() [all …]
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x2c4-minmax-fp32-armsimd32.c | 44 int32_t vacc0x1 = ((const int32_t*) w)[1]; in xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() local 64 vacc0x1 = __smlad(va0c02, vb1c02, vacc0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() 67 vacc0x1 = __smlad(va0c13, vb1c13, vacc0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() 73 float vfpacc0x1 = (float) vacc0x1; in xnn_qc8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32()
|
D | 2x2c4-minmax-fp32-armsimd32.c | 50 int32_t vacc0x1 = ((const int32_t*) w)[1]; in xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() local 52 int32_t vacc1x1 = vacc0x1; in xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 77 vacc0x1 = __smlad(va0c02, vb1c02, vacc0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 81 vacc0x1 = __smlad(va0c13, vb1c13, vacc0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 88 float vfpacc0x1 = (float) vacc0x1; in xnn_qc8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x2c4-minmax-fp32-armsimd32.c | 45 int32_t vacc0x1 = ((const int32_t*) w)[1]; in xnn_qs8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() local 65 vacc0x1 = __smlad(va0c02, vb1c02, vacc0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() 68 vacc0x1 = __smlad(va0c13, vb1c13, vacc0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() 74 float vfpacc0x1 = (float) vacc0x1; in xnn_qs8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32()
|
D | 2x2c4-minmax-fp32-armsimd32.c | 51 int32_t vacc0x1 = ((const int32_t*) w)[1]; in xnn_qs8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() local 53 int32_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 78 vacc0x1 = __smlad(va0c02, vb1c02, vacc0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 82 vacc0x1 = __smlad(va0c13, vb1c13, vacc0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 89 float vfpacc0x1 = (float) vacc0x1; in xnn_qs8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 1x2c4-minmax-fp32-armsimd32.c | 46 int32_t vacc0x1 = ((const int32_t*) w)[1]; in xnn_qu8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() local 66 vacc0x1 = __smlad(va0c02, vb1c02, vacc0x1); in xnn_qu8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() 69 vacc0x1 = __smlad(va0c13, vb1c13, vacc0x1); in xnn_qu8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32() 75 float vfpacc0x1 = (float) vacc0x1; in xnn_qu8_gemm_minmax_fp32_ukernel_1x2c4__armsimd32()
|
D | 2x2c4-minmax-fp32-armsimd32.c | 52 int32_t vacc0x1 = ((const int32_t*) w)[1]; in xnn_qu8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() local 54 int32_t vacc1x1 = vacc0x1; in xnn_qu8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 79 vacc0x1 = __smlad(va0c02, vb1c02, vacc0x1); in xnn_qu8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 83 vacc0x1 = __smlad(va0c13, vb1c13, vacc0x1); in xnn_qu8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32() 90 float vfpacc0x1 = (float) vacc0x1; in xnn_qu8_gemm_minmax_fp32_ukernel_2x2c4__armsimd32()
|