/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8conv/ |
D | 4x8-neon.c | 71 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 95 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 119 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 143 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 167 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 191 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 215 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 239 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 283 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local 307 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_4x8__neon() local [all …]
|
D | 8x8-neon.c | 97 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 137 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 177 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 217 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 257 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 297 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 337 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 377 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 453 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local 493 const uint8x8_t vb01234567 = vld1_u8(w); in pytorch_q8conv_ukernel_8x8__neon() local [all …]
|
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/hgemm/ |
D | 8x8-neonfp16arith.c | 83 const float16x8_t vb01234567 = vld1q_f16(w); in pytorch_hgemm_ukernel_8x8__neonfp16arith() local 97 const float16x8_t vb01234567 = vld1q_f16(w); in pytorch_hgemm_ukernel_8x8__neonfp16arith() local 111 const float16x8_t vb01234567 = vld1q_f16(w); in pytorch_hgemm_ukernel_8x8__neonfp16arith() local 125 const float16x8_t vb01234567 = vld1q_f16(w); in pytorch_hgemm_ukernel_8x8__neonfp16arith() local 159 const float16x8_t vb01234567 = vld1q_f16(w); in pytorch_hgemm_ukernel_8x8__neonfp16arith() local 173 const float16x8_t vb01234567 = vld1q_f16(w); in pytorch_hgemm_ukernel_8x8__neonfp16arith() local 186 const float16x8_t vb01234567 = vld1q_f16(w); in pytorch_hgemm_ukernel_8x8__neonfp16arith() local 199 const float16x8_t vb01234567 = vld1q_f16(w); in pytorch_hgemm_ukernel_8x8__neonfp16arith() local
|
/external/XNNPACK/src/f16-vbinary/gen/ |
D | vmin-neonfp16arith-x16.c | 38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmin_ukernel__neonfp16arith_x16() local 52 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmin_ukernel__neonfp16arith_x16() local 59 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmin_ukernel__neonfp16arith_x16() local
|
D | vsqrdiff-neonfp16arith-x16.c | 38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16() local 54 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16() local 62 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16() local
|
D | vmax-neonfp16arith-x16.c | 38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmax_ukernel__neonfp16arith_x16() local 52 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmax_ukernel__neonfp16arith_x16() local 59 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmax_ukernel__neonfp16arith_x16() local
|
D | vadd-minmax-neonfp16arith-x16.c | 40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16() local 59 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16() local 68 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16() local
|
D | vmul-minmax-neonfp16arith-x16.c | 40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16() local 59 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16() local 68 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16() local
|
D | vsub-minmax-neonfp16arith-x16.c | 40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16() local 59 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16() local 68 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16() local
|
D | vdiv-minmax-neonfp16arith-x16.c | 40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16() local 59 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16() local 68 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x16() local
|
D | vsqrdiff-neonfp16arith-x8.c | 38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x8() local 46 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vsqrdiff_ukernel__neonfp16arith_x8() local
|
D | vmax-neonfp16arith-x8.c | 38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmax_ukernel__neonfp16arith_x8() local 45 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmax_ukernel__neonfp16arith_x8() local
|
D | vmin-neonfp16arith-x8.c | 38 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmin_ukernel__neonfp16arith_x8() local 45 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmin_ukernel__neonfp16arith_x8() local
|
D | vdiv-minmax-neonfp16arith-x8.c | 40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x8() local 49 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vdiv_minmax_ukernel__neonfp16arith_x8() local
|
D | vadd-minmax-neonfp16arith-x8.c | 40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x8() local 49 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vadd_minmax_ukernel__neonfp16arith_x8() local
|
D | vmul-minmax-neonfp16arith-x8.c | 40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x8() local 49 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vmul_minmax_ukernel__neonfp16arith_x8() local
|
D | vsub-minmax-neonfp16arith-x8.c | 40 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8() local 49 const float16x8_t vb01234567 = vld1q_f16(b); in xnn_f16_vsub_minmax_ukernel__neonfp16arith_x8() local
|
/external/XNNPACK/src/f16-vbinary/ |
D | vop-neonfp16arith.c.in | 76 const float16x8_t vb01234567 = vld1q_f16(b); b += 8; variable 88 const float16x8_t vb01234567 = vld1q_f16(b); variable
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-avx2-mul32-ld64-x8.c | 35 const __m256i vb01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) input_b)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() local 59 const __m256i vb01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) input_b)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() local
|
D | minmax-avx2-mul32-ld64-x16.c | 35 const __m256i vb01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) input_b)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() local 64 const __m256i vb01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) input_b)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() local
|
D | minmax-wasmsimd-x8.c | 34 const v128_t vb01234567 = wasm_i16x8_load8x8(input_b); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() local 61 const v128_t vb01234567 = wasm_i16x8_load8x8(input_b); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() local
|
/external/XNNPACK/src/qu8-vadd/gen/ |
D | minmax-avx2-mul32-ld64-x8.c | 35 const __m256i vb01234567 = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) input_b)); in xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() local 59 const __m256i vb01234567 = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) input_b)); in xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() local
|
D | minmax-avx2-mul32-ld64-x16.c | 35 const __m256i vb01234567 = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) input_b)); in xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() local 64 const __m256i vb01234567 = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) input_b)); in xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16() local
|
D | minmax-wasmsimd-x8.c | 34 const v128_t vb01234567 = wasm_u16x8_load8x8(input_b); in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8() local 61 const v128_t vb01234567 = wasm_u16x8_load8x8(input_b); in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x8() local
|
D | minmax-neon-ld64-x8.c | 35 const uint8x8_t vb01234567 = vld1_u8(input_b); input_b += 8; in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() local 62 const uint8x8_t vb01234567 = vld1_u8(input_b); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() local
|