/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c1-scalar-2x.c | 53 const float vbias = w[1]; in xnn_f32_vmulcaddc_ukernel_c1__scalar_2x() local 55 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c1__scalar_2x() 56 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c1__scalar_2x()
|
D | c1-wasm-2x.c | 53 const float vbias = w[1]; in xnn_f32_vmulcaddc_ukernel_c1__wasm_2x() local 55 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c1__wasm_2x() 56 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c1__wasm_2x()
|
D | c2-scalar-2x.c | 92 const float vbias = w[1]; in xnn_f32_vmulcaddc_ukernel_c2__scalar_2x() local 94 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c2__scalar_2x() 95 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c2__scalar_2x()
|
D | c2-wasm-2x.c | 92 const float vbias = w[1]; in xnn_f32_vmulcaddc_ukernel_c2__wasm_2x() local 94 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c2__wasm_2x() 95 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c2__wasm_2x()
|
D | c4-scalar-2x.c | 116 const float vbias = w[3]; in xnn_f32_vmulcaddc_ukernel_c4__scalar_2x() local 118 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c4__scalar_2x() 119 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c4__scalar_2x()
|
D | c4-wasm-2x.c | 116 const float vbias = w[3]; in xnn_f32_vmulcaddc_ukernel_c4__wasm_2x() local 118 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c4__wasm_2x() 119 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_ukernel_c4__wasm_2x()
|
/external/XNNPACK/src/f32-vmulcaddc/ |
D | scalar.c.in | 66 const float vbias${ABC[C]} = w[${C + CHANNEL_TILE}]; 70 vacc${M}x${ABC[C]} = vacc${M}x${ABC[C]} * vscale${ABC[C]} + vbias${ABC[C]}; 94 const float vbias = w[${CHANNEL_TILE - 1}]; 97 vacc${M} = vacc${M} * vscale + vbias; 118 const float vbias = w[1]; 121 vacc${M} = vacc${M} * vscale + vbias;
|
D | neon.c.in | 70 const float32x4_t vbias${ABC[C:C+4]} = vld1q_f32(w); w += 4; 75 vacc${M}x${ABC[C:C+4]} = vaddq_f32(vacc${M}x${ABC[C:C+4]}, vbias${ABC[C:C+4]}); 79 …vacc${M}x${ABC[C:C+4]} = vfmaq_f32(vbias${ABC[C:C+4]}, vscale${ABC[C:C+4]}, vacc${M}x${ABC[C:C+4]}…
|
D | psimd.c.in | 68 const psimd_f32 vbias${ABC[C:C+4]} = psimd_load_f32(w + ${C + CHANNEL_TILE}); 72 …vacc${M}x${ABC[C:C+4]} = psimd_qfma_f32(vbias${ABC[C:C+4]}, vscale${ABC[C:C+4]}, vacc${M}x${ABC[C:…
|
D | sse.c.in | 72 const __m128 vbias${ABC[C:C+4]} = _mm_load_ps(w + ${C + CHANNEL_TILE}); 76 vacc${M}x${ABC[C:C+4]} = _mm_add_ps(vacc${M}x${ABC[C:C+4]}, vbias${ABC[C:C+4]});
|
/external/XNNPACK/src/q8-avgpool/ |
D | up9-neon.c | 33 const int32x4_t vbias = vld1q_dup_s32(¶ms->neon.bias); in xnn_q8_avgpool_ukernel_up9__neon() local 101 int32x4_t vacc_lo = vaddw_s16(vbias, vreinterpret_s16_u16(vget_low_u16(vsum))); in xnn_q8_avgpool_ukernel_up9__neon() 102 int32x4_t vacc_hi = vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_q8_avgpool_ukernel_up9__neon() 174 int32x4_t vacc_lo = vaddw_s16(vbias, vreinterpret_s16_u16(vget_low_u16(vsum))); in xnn_q8_avgpool_ukernel_up9__neon() 175 int32x4_t vacc_hi = vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_q8_avgpool_ukernel_up9__neon()
|
D | up9-sse2.c | 32 const __m128i vbias = _mm_load_si128((const __m128i*) ¶ms->sse2.bias); in xnn_q8_avgpool_ukernel_up9__sse2() local 105 const __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_q8_avgpool_ukernel_up9__sse2() 106 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_q8_avgpool_ukernel_up9__sse2() 180 const __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_q8_avgpool_ukernel_up9__sse2() 181 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_q8_avgpool_ukernel_up9__sse2()
|
D | up9-scalar.c | 31 const int32_t vbias = params->scalar.bias; in xnn_q8_avgpool_ukernel_up9__scalar() local 95 const int32_t vacc = vbias + (int32_t) vsum; in xnn_q8_avgpool_ukernel_up9__scalar()
|
D | mp9p8q-scalar.c | 31 const int32_t vbias = params->scalar.bias; in xnn_q8_avgpool_ukernel_mp9p8q__scalar() local 71 int32_t vacc = vbias + (int32_t) vsum2345; in xnn_q8_avgpool_ukernel_mp9p8q__scalar()
|
D | mp9p8q-neon.c | 33 const int32x4_t vbias = vld1q_dup_s32(¶ms->neon.bias); in xnn_q8_avgpool_ukernel_mp9p8q__neon() local 77 const int32x4_t vacc_lo = vaddw_s16(vbias, vreinterpret_s16_u16(vget_low_u16(vsum))); in xnn_q8_avgpool_ukernel_mp9p8q__neon() 78 const int32x4_t vacc_hi = vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_q8_avgpool_ukernel_mp9p8q__neon()
|
D | mp9p8q-sse2.c | 32 const __m128i vbias = _mm_load_si128((const __m128i*) ¶ms->sse2.bias); in xnn_q8_avgpool_ukernel_mp9p8q__sse2() local 81 const __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_q8_avgpool_ukernel_mp9p8q__sse2() 82 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
|
/external/XNNPACK/src/q8-gavgpool/ |
D | up7-sse2.c | 55 const __m128i vbias = _mm_load_si128((const __m128i*) ¶ms->sse2.bias); in xnn_q8_gavgpool_ukernel_up7__sse2() local 86 __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_up7__sse2() 87 __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_up7__sse2() 155 __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_up7__sse2() 156 __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_up7__sse2()
|
D | up7-neon.c | 56 const int32x4_t vbias = vld1q_dup_s32(¶ms->neon.bias); in xnn_q8_gavgpool_ukernel_up7__neon() local 83 int32x4_t vacc_lo = vaddw_s16(vbias, vget_low_s16(vsum)); in xnn_q8_gavgpool_ukernel_up7__neon() 84 int32x4_t vacc_hi = vaddw_s16(vbias, vget_high_s16(vsum)); in xnn_q8_gavgpool_ukernel_up7__neon() 153 int32x4_t vacc_lo = vaddw_s16(vbias, vget_low_s16(vsum)); in xnn_q8_gavgpool_ukernel_up7__neon() 154 int32x4_t vacc_hi = vaddw_s16(vbias, vget_high_s16(vsum)); in xnn_q8_gavgpool_ukernel_up7__neon()
|
D | up7-scalar.c | 51 const int32_t vbias = params->scalar.bias; in xnn_q8_gavgpool_ukernel_up7__scalar() local 75 const int32_t vacc = vbias + (int32_t) vsum; in xnn_q8_gavgpool_ukernel_up7__scalar()
|
D | mp7p7q-scalar.c | 36 const int32_t vbias = params->scalar.bias; in xnn_q8_gavgpool_ukernel_mp7p7q__scalar() local 57 const int32_t vacc = vbias + (int32_t) vsum; in xnn_q8_gavgpool_ukernel_mp7p7q__scalar()
|
D | mp7p7q-neon.c | 40 const int32x4_t vbias = vld1q_dup_s32(¶ms->neon.bias); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() local 61 const int32x4_t vacc_lo = vaddw_s16(vbias, vget_low_s16(vsum)); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() 62 const int32x4_t vacc_hi = vaddw_s16(vbias, vget_high_s16(vsum)); in xnn_q8_gavgpool_ukernel_mp7p7q__neon()
|
D | mp7p7q-sse2.c | 39 const __m128i vbias = _mm_load_si128((const __m128i*) ¶ms->sse2.bias); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2() local 68 const __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2() 69 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
|
/external/XNNPACK/src/f32-dwconv-spchw/ |
D | 3x3p1-sse.c | 40 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv_spchw_ukernel_3x3p1__sse() local 70 __m128 vo4567p0 = vbias; in xnn_f32_dwconv_spchw_ukernel_3x3p1__sse() 143 __m128 vo4567p0 = vbias; in xnn_f32_dwconv_spchw_ukernel_3x3p1__sse()
|
D | 3x3s2p1-sse.c | 41 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse() local 59 __m128 vo8ACEp0 = vbias; in xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse() 118 __m128 vo8ACEp0 = vbias; in xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse()
|
/external/XNNPACK/src/f32-dwconv/ |
D | up4x9-aarch64-neonfma-cortex-a55.S | 57 # Load vbias.lo 60 # Load vbias.hi 351 # Load vbias.lo 354 # Load vbias.hi
|