Home
last modified time | relevance | path

Searched refs:ABC (Results 1 – 25 of 721) sorted by relevance

12345678910>>...29

/external/XNNPACK/src/f32-qs8-vcvt/
Davx512skx.c.in10 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
52 __m512 vx${ABC[N:N+4]} = _mm512_loadu_ps(x + ${N * 4});
56 vx${ABC[N:N+4]} = _mm512_mul_ps(vx${ABC[N:N+4]}, vscale);
59 vx${ABC[N:N+4]} = _mm512_min_ps(vx${ABC[N:N+4]}, voutput_max_less_zero_point);
62 const __m512i vacc${ABC[N:N+4]} = _mm512_cvtps_epi32(vx${ABC[N:N+4]});
65 …2i vacc${ABC[N]}${ABC[N+4]}${ABC[N+1]}${ABC[N+5]}${ABC[N+2]}${ABC[N+6]}${ABC[N+3]}${ABC[N+7]} = _m…
68ABC[N]}${ABC[N+4]}${ABC[N+1]}${ABC[N+5]}${ABC[N+2]}${ABC[N+6]}${ABC[N+3]}${ABC[N+7]} = _mm512_adds…
72ABC[N]}${ABC[N+4]}${ABC[N+8]}${ABC[N+12]}${ABC[N+1]}${ABC[N+5]}${ABC[N+9]}${ABC[N+13]}${ABC[N+2]}$…
74ABC[N]}${ABC[N+4]}${ABC[N+2]}${ABC[N+6]}${ABC[N+1]}${ABC[N+5]}${ABC[N+3]}${ABC[N+7]} = ${_MM256_PA…
78ABC[N]}${ABC[N+4]}${ABC[N+8]}${ABC[N+12]}${ABC[N+1]}${ABC[N+5]}${ABC[N+9]}${ABC[N+13]}${ABC[N+2]}$…
[all …]
/external/XNNPACK/src/qs8-vadd/
Dsse-mul16-ld64.c.in12 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
47 const __m128i va${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) input_a));
48 const __m128i vb${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) input_b));
50 …const __m128i va${ABC[N:N+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (input_a + $…
51 …const __m128i vb${ABC[N:N+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (input_b + $…
53 __m128i va${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) input_a);
54 __m128i vb${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) input_b);
56 __m128i va${ABC[N:N+8]} = _mm_loadl_epi64((const __m128i*) (input_a + ${N}));
57 __m128i vb${ABC[N:N+8]} = _mm_loadl_epi64((const __m128i*) (input_b + ${N}));
65 va${ABC[N:N+8]} = _mm_unpacklo_epi8(va${ABC[N:N+8]}, vzero);
[all …]
Davx2-mul32-ld64.c.in9 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
42 … const __m256i va${ABC[0:8]} = ${_MM256_CVTEPX8_EPI32}(_mm_loadl_epi64((const __m128i*) input_a));
43 … const __m256i vb${ABC[0:8]} = ${_MM256_CVTEPX8_EPI32}(_mm_loadl_epi64((const __m128i*) input_b));
45 …const __m256i va${ABC[N:N+8]} = ${_MM256_CVTEPX8_EPI32}(_mm_loadl_epi64((const __m128i*) (input_a …
46 …const __m256i vb${ABC[N:N+8]} = ${_MM256_CVTEPX8_EPI32}(_mm_loadl_epi64((const __m128i*) (input_b …
51 …__m256i vacc${ABC[N:N+8]} = _mm256_add_epi32(vbias, _mm256_mullo_epi32(va${ABC[N:N+8]}, va_multipl…
54 …vacc${ABC[N:N+8]} = _mm256_add_epi32(vacc${ABC[N:N+8]}, _mm256_mullo_epi32(vb${ABC[N:N+8]}, vb_mul…
57 vacc${ABC[N:N+8]} = _mm256_sra_epi32(vacc${ABC[N:N+8]}, vshift);
61 …_m256i vout${ABC[N:N+4]}${ABC[N+8:N+12]}${ABC[N+4:N+8]}${ABC[N+12:N+16]} = _mm256_adds_epi16(_mm25…
63 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[N:N+8]…
[all …]
Dsse-mul32-ld32.c.in11 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
52 …const __m128i va${ABC[0:4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(inpu…
53 …const __m128i vb${ABC[0:4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(inpu…
55 …const __m128i va${ABC[N:N+4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(in…
56 …const __m128i vb${ABC[N:N+4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(in…
62 __m128i vacc${ABC[N:N+4]} = _mm_macc_epi32(va${ABC[N:N+4]}, va_multiplier, vbias);
65 vacc${ABC[N:N+4]} = _mm_macc_epi32(vb${ABC[N:N+4]}, vb_multiplier, vacc${ABC[N:N+4]});
68 … __m128i vacc${ABC[N:N+4]} = _mm_add_epi32(vbias, _mm_mullo_epi32(va${ABC[N:N+4]}, va_multiplier));
71 …vacc${ABC[N:N+4]} = _mm_add_epi32(vacc${ABC[N:N+4]}, _mm_mullo_epi32(vb${ABC[N:N+4]}, vb_multiplie…
74 vacc${ABC[N:N+4]} = _mm_sra_epi32(vacc${ABC[N:N+4]}, vshift);
[all …]
Dwasmsimd.c.in9 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
40 const v128_t va${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(input_a);
41 const v128_t vb${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(input_b);
43 const v128_t va${ABC[N:N+8]} = ${WASM_X16X8_LOAD8X8}(input_a + ${N});
44 const v128_t vb${ABC[N:N+8]} = ${WASM_X16X8_LOAD8X8}(input_b + ${N});
49 …v128_t vacc${ABC[N:N+4]} = wasm_i32x4_add(vbias, wasm_i32x4_mul(${WASM_X32X4_EXTEND_LOW_X16X8}(va$…
50 …v128_t vacc${ABC[N+4:N+8]} = wasm_i32x4_add(vbias, wasm_i32x4_mul(${WASM_X32X4_EXTEND_HIGH_X16X8}(…
53 …vacc${ABC[N:N+4]} = wasm_i32x4_add(vacc${ABC[N:N+4]}, wasm_i32x4_mul(${WASM_X32X4_EXTEND_LOW_X16X8…
54 …vacc${ABC[N+4:N+8]} = wasm_i32x4_add(vacc${ABC[N+4:N+8]}, wasm_i32x4_mul(${WASM_X32X4_EXTEND_HIGH_…
57 vacc${ABC[N:N+4]} = wasm_i32x4_shr(vacc${ABC[N:N+4]}, vshift);
[all …]
Davx512skx-mul32-ld128.c.in9 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
47 … const __m512i va${ABC[0:16]} = ${_MM512_CVTEPX8_EPI32}(_mm_loadu_si128((const __m128i*) input_a));
48 … const __m512i vb${ABC[0:16]} = ${_MM512_CVTEPX8_EPI32}(_mm_loadu_si128((const __m128i*) input_b));
50 …const __m512i va${ABC[N:N+16]} = ${_MM512_CVTEPX8_EPI32}(_mm_loadu_si128((const __m128i*) (input_a…
51 …const __m512i vb${ABC[N:N+16]} = ${_MM512_CVTEPX8_EPI32}(_mm_loadu_si128((const __m128i*) (input_b…
56 …__m512i vacc${ABC[N:N+16]} = _mm512_add_epi32(vbias, _mm512_mullo_epi32(va${ABC[N:N+16]}, va_multi…
59 …vacc${ABC[N:N+16]} = _mm512_add_epi32(vacc${ABC[N:N+16]}, _mm512_mullo_epi32(vb${ABC[N:N+16]}, vb_…
62 vacc${ABC[N:N+16]} = _mm512_sra_epi32(vacc${ABC[N:N+16]}, vshift);
66ABC[N:N+4]}${ABC[N+16:N+20]}${ABC[N+4:N+8]}${ABC[N+20:N+24]}${ABC[N+8:N+12]}${ABC[N+24:N+28]}${ABC
68ABC[N:N+4]}${ABC[N+8:N+12]}${ABC[N+4:N+8]}${ABC[N+12:N+16]} = _mm256_adds_epi16(_mm256_packs_epi32…
[all …]
/external/XNNPACK/src/qs8-vmul/
Dsse-mul16-ld64.c.in13 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
46 const __m128i va${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) input_a));
47 const __m128i vb${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) input_b));
49 …const __m128i va${ABC[N:N+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (input_a + $…
50 …const __m128i vb${ABC[N:N+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (input_b + $…
52 __m128i va${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) input_a);
53 __m128i vb${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) input_b);
55 __m128i va${ABC[N:N+8]} = _mm_loadl_epi64((const __m128i*) (input_a + ${N}));
56 __m128i vb${ABC[N:N+8]} = _mm_loadl_epi64((const __m128i*) (input_b + ${N}));
64 va${ABC[N:N+8]} = _mm_unpacklo_epi8(va${ABC[N:N+8]}, vzero);
[all …]
Dwasmsimd-mul32-ld64.c.in10 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
42 const v128_t va${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(input_a);
43 const v128_t vb${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(input_b);
45 const v128_t va${ABC[N:N+8]} = ${WASM_X16X8_LOAD8X8}(input_a + ${N});
46 const v128_t vb${ABC[N:N+8]} = ${WASM_X16X8_LOAD8X8}(input_b + ${N});
51 const v128_t vxa${ABC[N:N+8]} = wasm_i16x8_sub(va${ABC[N:N+8]}, va_zero_point);
52 const v128_t vxb${ABC[N:N+8]} = wasm_i16x8_sub(vb${ABC[N:N+8]}, vb_zero_point);
55 …v128_t vacc${ABC[N:N+4]} = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(vxa${ABC[N:N+8]}), wasm_i32x…
56 …v128_t vacc${ABC[N+4:N+8]} = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(vxa${ABC[N:N+8]}), wasm_i…
59 vacc${ABC[N:N+4]} = wasm_f32x4_convert_i32x4(vacc${ABC[N:N+4]});
[all …]
Dneon.c.in10 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
82 const ${XINT8X16_T} va${ABC[N:N+16]} = ${VLD1Q_X8}(input_a); input_a += 16;
83 const ${XINT8X16_T} vb${ABC[N:N+16]} = ${VLD1Q_X8}(input_b); input_b += 16;
88 …const int16x8_t vxa${ABC[N:N+8]} = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va${ABC[N:N+16]}), v…
89 …const int16x8_t vxa${ABC[N+8:N+16]} = vreinterpretq_s16_u16(vsubl_high_u8(va${ABC[N:N+16]}, va_zer…
90 …const int16x8_t vxb${ABC[N:N+8]} = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(vb${ABC[N:N+16]}), v…
91 …const int16x8_t vxb${ABC[N+8:N+16]} = vreinterpretq_s16_u16(vsubl_high_u8(vb${ABC[N:N+16]}, vb_zer…
93 …const int16x8_t vxa${ABC[N:N+8]} = vsubl_s8(vget_low_s8(va${ABC[N:N+16]}), vget_low_s8(va_zero_poi…
94 const int16x8_t vxa${ABC[N+8:N+16]} = vsubl_high_s8(va${ABC[N:N+16]}, va_zero_point);
95 …const int16x8_t vxb${ABC[N:N+8]} = vsubl_s8(vget_low_s8(vb${ABC[N:N+16]}), vget_low_s8(vb_zero_poi…
[all …]
/external/XNNPACK/src/qs8-gavgpool/
Dmultipass-wasmsimd.c.in13 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
54 const v128_t vxi${M}x${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(i${M});
56 const v128_t vxi${M}x${ABC[C:C+8]} = ${WASM_X16X8_LOAD8X8}(i${M} + ${C});
59 v128_t vacc${ABC[0:8]} = wasm_i16x8_add(vxi0x${ABC[0:8]}, vxi1x${ABC[0:8]});
60 const v128_t vxi2x${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(i2);
62 v128_t vacc${ABC[C:C+8]} = wasm_i16x8_add(vxi0x${ABC[C:C+8]}, vxi1x${ABC[C:C+8]});
63 const v128_t vxi2x${ABC[C:C+8]} = ${WASM_X16X8_LOAD8X8}(i2 + ${C});
67 vacc${ABC[0:8]} = wasm_i16x8_add(vacc${ABC[0:8]}, vxi${M-1}x${ABC[0:8]});
68 const v128_t vxi${M}x${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(i${M});
70 vacc${ABC[C:C+8]} = wasm_i16x8_add(vacc${ABC[C:C+8]}, vxi${M-1}x${ABC[C:C+8]});
[all …]
Dmultipass-sse2.c.in13 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
56 … __m128i vacc${ABC[C:C+8]} = _mm_add_epi16(vxi${M-3}x${ABC[C:C+8]}, vxi${M-2}x${ABC[C:C+8]});
58 vacc${ABC[C:C+8]} = _mm_add_epi16(vacc${ABC[C:C+8]}, vxi${M-2}x${ABC[C:C+8]});
61 …const __m128i vxi${M-1}x${ABC[C:C+8]} = _mm_srai_epi16(_mm_unpacklo_epi8(vi${M-1}x${ABC[C:C+8]}, v…
63 … const __m128i vxi${M-1}x${ABC[C:C+8]} = _mm_unpacklo_epi8(vi${M-1}x${ABC[C:C+8]}, vzero);
66 const __m128i vi${M}x${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) i${M});
68 const __m128i vi${M}x${ABC[C:C+8]} = _mm_loadl_epi64((const __m128i*) (i${M} + ${C}));
74 … const __m128i vsgnacc${ABC[C:C+8]} = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc${ABC[C:C+8]});
75 __m128i vacc${ABC[C:C+4]} = _mm_unpacklo_epi16(vacc${ABC[C:C+8]}, vsgnacc${ABC[C:C+8]});
76 __m128i vacc${ABC[C+4:C+8]} = _mm_unpackhi_epi16(vacc${ABC[C:C+8]}, vsgnacc${ABC[C:C+8]});
[all …]
Dmultipass-sse4.c.in13 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
54 … const __m128i vxi${M}x${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i${M}));
56 …const __m128i vxi${M}x${ABC[C:C+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (i${M}…
59 __m128i vacc${ABC[0:8]} = _mm_add_epi16(vxi0x${ABC[0:8]}, vxi1x${ABC[0:8]});
60 const __m128i vxi2x${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i2));
62 __m128i vacc${ABC[C:C+8]} = _mm_add_epi16(vxi0x${ABC[C:C+8]}, vxi1x${ABC[C:C+8]});
63 …const __m128i vxi2x${ABC[C:C+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (i2 + ${C…
67 vacc${ABC[0:8]} = _mm_add_epi16(vacc${ABC[0:8]}, vxi${M-1}x${ABC[0:8]});
68 … const __m128i vxi${M}x${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i${M}));
70 vacc${ABC[C:C+8]} = _mm_add_epi16(vacc${ABC[C:C+8]}, vxi${M-1}x${ABC[C:C+8]});
[all …]
Dunipass-sse2.c.in11 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
59 … __m128i vacc${ABC[C:C+8]} = _mm_add_epi16(vxi${M-3}x${ABC[C:C+8]}, vxi${M-2}x${ABC[C:C+8]});
61 vacc${ABC[C:C+8]} = _mm_add_epi16(vacc${ABC[C:C+8]}, vxi${M-2}x${ABC[C:C+8]});
64 …const __m128i vxi${M-1}x${ABC[C:C+8]} = _mm_srai_epi16(_mm_unpacklo_epi8(vi${M-1}x${ABC[C:C+8]}, v…
66 … const __m128i vxi${M-1}x${ABC[C:C+8]} = _mm_unpacklo_epi8(vi${M-1}x${ABC[C:C+8]}, vzero);
69 const __m128i vi${M}x${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) i${M});
71 const __m128i vi${M}x${ABC[C:C+8]} = _mm_loadl_epi64((const __m128i*) (i${M} + ${C}));
77 … const __m128i vsgnacc${ABC[C:C+8]} = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc${ABC[C:C+8]});
78 __m128i vacc${ABC[C:C+4]} = _mm_unpacklo_epi16(vacc${ABC[C:C+8]}, vsgnacc${ABC[C:C+8]});
79 __m128i vacc${ABC[C+4:C+8]} = _mm_unpackhi_epi16(vacc${ABC[C:C+8]}, vsgnacc${ABC[C:C+8]});
[all …]
Dunipass-sse4.c.in11 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
57 … const __m128i vxi${M}x${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i${M}));
59 …const __m128i vxi${M}x${ABC[C:C+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (i${M}…
62 __m128i vacc${ABC[0:8]} = _mm_add_epi16(vxi0x${ABC[0:8]}, vxi1x${ABC[0:8]});
63 const __m128i vxi2x${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i2));
65 __m128i vacc${ABC[C:C+8]} = _mm_add_epi16(vxi0x${ABC[C:C+8]}, vxi1x${ABC[C:C+8]});
66 …const __m128i vxi2x${ABC[C:C+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (i2 + ${C…
70 vacc${ABC[0:8]} = _mm_add_epi16(vacc${ABC[0:8]}, vxi${M-1}x${ABC[0:8]});
71 … const __m128i vxi${M}x${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i${M}));
73 vacc${ABC[C:C+8]} = _mm_add_epi16(vacc${ABC[C:C+8]}, vxi${M-1}x${ABC[C:C+8]});
[all …]
Dunipass-wasmsimd.c.in11 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
58 const v128_t vxi${M}x${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(i${M});
60 const v128_t vxi${M}x${ABC[C:C+8]} = ${WASM_X16X8_LOAD8X8}(i${M} + ${C});
63 v128_t vacc${ABC[0:8]} = wasm_i16x8_add(vxi0x${ABC[0:8]}, vxi1x${ABC[0:8]});
64 const v128_t vxi2x${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(i2);
66 v128_t vacc${ABC[C:C+8]} = wasm_i16x8_add(vxi0x${ABC[C:C+8]}, vxi1x${ABC[C:C+8]});
67 const v128_t vxi2x${ABC[C:C+8]} = ${WASM_X16X8_LOAD8X8}(i2 + ${C});
71 vacc${ABC[0:8]} = wasm_i16x8_add(vacc${ABC[0:8]}, vxi${M-1}x${ABC[0:8]});
72 const v128_t vxi${M}x${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(i${M});
74 vacc${ABC[C:C+8]} = wasm_i16x8_add(vacc${ABC[C:C+8]}, vxi${M-1}x${ABC[C:C+8]});
[all …]
Dmultipass-neon.c.in13 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
76 const ${XINT8X8_T} vi${M}x${ABC[C:C+8]} = ${VLD1_X8}(i${M}); i${M} += 8;
79 const ${XINT8X8_T} vi2x${ABC[C:C+8]} = ${VLD1_X8}(i2); i2 += 8;
80 ${XINT16X8_T} vsum${ABC[C:C+8]} = ${VADDL_X8}(vi0x${ABC[C:C+8]}, vi1x${ABC[C:C+8]});
85 const ${XINT8X8_T} vi${M+1}x${ABC[C:C+8]} = ${VLD1_X8}(i${M+1}); i${M+1} += 8;
86 vsum${ABC[C:C+8]} = ${VADDW_X8}(vsum${ABC[C:C+8]}, vi${M}x${ABC[C:C+8]});
90 const int32x4_t vacc${ABC[C:C+4]} = vaddw_s16(vinit_bias, vget_low_s16(vsum${ABC[C:C+8]}));
91 … const int32x4_t vacc${ABC[C+4:C+8]} = vaddw_s16(vinit_bias, vget_high_s16(vsum${ABC[C:C+8]}));
93 …const int32x4_t vacc${ABC[C:C+4]} = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vinit_bi…
94 …const int32x4_t vacc${ABC[C+4:C+8]} = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vinit_…
[all …]
/external/XNNPACK/src/qs8-dwconv/
Dunipass-avx2-mul16-vpunpck.c.in6 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
49 __m256i vacc${ABC[0:8]} = _mm256_loadu_si256((const __m256i*) w);
51 …__m256i vacc${ABC[C:C+8]} = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + ${C} * sizeof(int…
54 …__m256i vacc${ABC[C:C+4]}${ABC[C+8:C+12]} = _mm256_inserti128_si256(vacc${ABC[C:C+8]}, _mm256_cast…
55 …__m256i vacc${ABC[C+4:C+8]}${ABC[C+12:C+16]} = _mm256_permute2x128_si256(vacc${ABC[C:C+8]}, vacc${
61 … const __m256i vi${K}x${ABC[0:16]} = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i${K}));
63 …const __m256i vi${K}x${ABC[C:C+16]} = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i${K}…
64 …const __m256i vk${K}x${ABC[C:C+16]} = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uint…
70 … __m256i vacc${ABC[C:C+16]} = _mm256_mullo_epi16(vi${K}x${ABC[C:C+16]}, vk${K}x${ABC[C:C+16]});
72 vacc${ABC[C:C+16]} = _mm256_mullo_epi16(vi${K}x${ABC[C:C+16]}, vk${K}x${ABC[C:C+16]});
[all …]
Dunipass-sse-mul16.c.in11 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
65 __m128i vacc${ABC[0:4]} = _mm_loadu_si128((const __m128i*) w);
67 __m128i vacc${ABC[C:C+4]} = _mm_loadu_si128((const __m128i*) ((const int32_t*) w + ${C}));
73 const __m128i vi${K}x${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) i${K});
75 const __m128i vi${K}x${ABC[C:C+8]} = _mm_loadl_epi64((const __m128i*) (i${K} + ${C}));
78 const __m128i vxi${K}x${ABC[C:C+8]} = _mm_cvtepu8_epi16(vi${K}x${ABC[C:C+8]});
80 const __m128i vxi${K}x${ABC[C:C+8]} = _mm_cvtepi8_epi16(vi${K}x${ABC[C:C+8]});
81 …const __m128i vk${K}x${ABC[C:C+8]} = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + ${CHANNEL_T…
84 …const __m128i vxk${K}x${ABC[C:C+8]} = _mm_sub_epi16(_mm_cvtepu8_epi16(vk${K}x${ABC[C:C+8]}), vk_ze…
86 const __m128i vxk${K}x${ABC[C:C+8]} = _mm_cvtepi8_epi16(vk${K}x${ABC[C:C+8]});
[all …]
Dunipass-avx512skx-mul32.c.in6 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
70 __m512i vacc${ABC[0:16]} = _mm512_loadu_si512(w);
72 …__m512i vacc${ABC[C:C+16]} = _mm512_loadu_si512((const void*) ((uintptr_t) w + ${C} * sizeof(int32…
78 …const __m512i vi${K}x${ABC[0:16]} = ${_MM512_CVTEPX8_EPI32}(_mm_loadu_si128((const __m128i*) i${K}…
80 …const __m512i vi${K}x${ABC[C:C+16]} = ${_MM512_CVTEPX8_EPI32}(_mm_loadu_si128((const __m128i*) (i$…
82 …const __m512i vk${K}x${ABC[C:C+16]} = _mm512_sub_epi32(_mm512_cvtepu8_epi32(_mm_load_si128((const …
84 …const __m512i vk${K}x${ABC[C:C+16]} = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintp…
88 …vacc${ABC[C:C+16]} = _mm512_add_epi32(vacc${ABC[C:C+16]}, _mm512_mullo_epi32(vi${K}x${ABC[C:C+16]}…
93 __m512 vscaled${ABC[C:C+16]} = _mm512_cvtepi32_ps(vacc${ABC[C:C+16]});
96 const __m512 vscale${ABC[0:16]} = _mm512_loadu_ps(w);
[all …]
Dunipass-wasmsimd-mul16.c.in6 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
57 v128_t vacc${ABC[0:4]} = wasm_v128_load(w);
59 … v128_t vacc${ABC[C:C+4]} = wasm_v128_load((const void*) ((uintptr_t) w + ${C} * sizeof(int32_t)));
65 const v128_t vi${K}x${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(i${K});
67 const v128_t vi${K}x${ABC[C:C+8]} = ${WASM_X16X8_LOAD8X8}(i${K} + ${C});
68 …const v128_t vk${K}x${ABC[C:C+8]} = ${WASM_X16X8_LOAD8X8}((const void*) ((uintptr_t) w + ${CHANNEL…
72 v128_t vsumx${ABC[C:C+8]} = wasm_i16x8_add(vi0x${ABC[C:C+8]}, vi1x${ABC[C:C+8]});
74 vsumx${ABC[C:C+8]} = wasm_i16x8_add(vsumx${ABC[C:C+8]}, vi${K}x${ABC[C:C+8]});
79 v128_t vprod${ABC[C:C+8]} = wasm_i16x8_mul(vi${K}x${ABC[C:C+8]}, vk${K}x${ABC[C:C+8]});
81 vprod${ABC[C:C+8]} = wasm_i16x8_mul(vi${K}x${ABC[C:C+8]}, vk${K}x${ABC[C:C+8]});
[all …]
Dunipass-neon-mul8.c.in9 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
72 int32x4_t vacc${ABC[C:C+4]} = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4);
77 const int8x16_t vi${K}x${ABC[C:C+16]} = vld1q_s8(i${K}); i${K} += 16;
78 … const int8x16_t vk${K}x${ABC[C:C+16]} = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16);
82 …int16x8_t vprod${ABC[C:C+8]} = vmull_s8(vget_low_s8(vi${K}x${ABC[C:C+16]}), vget_low_s8(vk${K}x${A…
83 …int16x8_t vprod${ABC[C+8:C+16]} = vmull_s8(vget_high_s8(vi${K}x${ABC[C:C+16]}), vget_high_s8(vk${K…
86 …vprod${ABC[C:C+8]} = vmull_s8(vget_low_s8(vi${K}x${ABC[C:C+16]}), vget_low_s8(vk${K}x${ABC[C:C+16]…
87 …vprod${ABC[C+8:C+16]} = vmull_s8(vget_high_s8(vi${K}x${ABC[C:C+16]}), vget_high_s8(vk${K}x${ABC[C:…
90 …vprod${ABC[C:C+8]} = vmlal_s8(vprod${ABC[C:C+8]}, vget_low_s8(vi${K}x${ABC[C:C+16]}), vget_low_s8(…
91 …vprod${ABC[C+8:C+16]} = vmlal_s8(vprod${ABC[C+8:C+16]}, vget_high_s8(vi${K}x${ABC[C:C+16]}), vget_…
[all …]
/external/XNNPACK/src/qs8-vmulc/
Dsse-mul16-ld64.c.in13 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
48 const __m128i va${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) input_a));
50 …const __m128i va${ABC[N:N+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (input_a + $…
52 __m128i va${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) input_a);
54 __m128i va${ABC[N:N+8]} = _mm_loadl_epi64((const __m128i*) (input_a + ${N}));
61 va${ABC[N:N+8]} = _mm_unpacklo_epi8(va${ABC[N:N+8]}, vzero);
64 va${ABC[N:N+8]} = _mm_srai_epi16(_mm_unpacklo_epi8(va${ABC[N:N+8]}, va${ABC[N:N+8]}), 8);
67 const __m128i vxa${ABC[N:N+8]} = _mm_sub_epi16(va${ABC[N:N+8]}, va_zero_point);
70 const __m128i vprod${ABC[N:N+8]}lo = _mm_mullo_epi16(vxa${ABC[N:N+8]}, vxb);
71 const __m128i vprod${ABC[N:N+8]}hi = _mm_mulhi_epi16(vxa${ABC[N:N+8]}, vxb);
[all …]
Dwasmsimd-mul32-ld64.c.in10 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
45 const v128_t va${ABC[0:8]} = ${WASM_X16X8_LOAD8X8}(input_a);
47 const v128_t va${ABC[N:N+8]} = ${WASM_X16X8_LOAD8X8}(input_a + ${N});
51 const v128_t vxa${ABC[N:N+8]} = wasm_i16x8_sub(va${ABC[N:N+8]}, va_zero_point);
54 … v128_t vacc${ABC[N:N+4]} = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(vxa${ABC[N:N+8]}), vxblo);
55 …v128_t vacc${ABC[N+4:N+8]} = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(vxa${ABC[N:N+8]}), vxbhi);
58 vacc${ABC[N:N+4]} = wasm_f32x4_convert_i32x4(vacc${ABC[N:N+4]});
61 vacc${ABC[N:N+4]} = wasm_f32x4_mul(vacc${ABC[N:N+4]}, vscale);
64 vacc${ABC[N:N+4]} = wasm_f32x4_add(vacc${ABC[N:N+4]}, vmagic_bias);
67 vacc${ABC[N:N+4]} = wasm_i32x4_max(vacc${ABC[N:N+4]}, vmagic_min);
[all …]
/external/XNNPACK/src/qs8-vaddc/
Dsse-mul16-ld64.c.in12 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
47 const __m128i va${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) input_a));
49 …const __m128i va${ABC[N:N+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (input_a + $…
51 __m128i va${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) input_a);
53 __m128i va${ABC[N:N+8]} = _mm_loadl_epi64((const __m128i*) (input_a + ${N}));
60 va${ABC[N:N+8]} = _mm_unpacklo_epi8(va${ABC[N:N+8]}, vzero);
63 va${ABC[N:N+8]} = _mm_srai_epi16(_mm_unpacklo_epi8(va${ABC[N:N+8]}, va${ABC[N:N+8]}), 8);
66 __m128i vaprod${ABC[N:N+8]}hi = _mm_mulhi_epu16(va${ABC[N:N+8]}, va_multiplier_lo);
67 const __m128i vaprod${ABC[N:N+8]}lo = _mm_mullo_epi16(va${ABC[N:N+8]}, va_multiplier_lo);
70 …vaprod${ABC[N:N+8]}hi = _mm_add_epi16(vaprod${ABC[N:N+8]}hi, _mm_mullo_epi16(va${ABC[N:N+8]}, va_m…
[all …]
Davx2-mul32-ld64.c.in9 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
43 … const __m256i va${ABC[0:8]} = ${_MM256_CVTEPX8_EPI32}(_mm_loadl_epi64((const __m128i*) input_a));
45 …const __m256i va${ABC[N:N+8]} = ${_MM256_CVTEPX8_EPI32}(_mm_loadl_epi64((const __m128i*) (input_a …
49 …__m256i vacc${ABC[N:N+8]} = _mm256_add_epi32(vbias, _mm256_mullo_epi32(va${ABC[N:N+8]}, va_multipl…
52 vacc${ABC[N:N+8]} = _mm256_sra_epi32(vacc${ABC[N:N+8]}, vshift);
56 …_m256i vout${ABC[N:N+4]}${ABC[N+8:N+12]}${ABC[N+4:N+8]}${ABC[N+12:N+16]} = _mm256_adds_epi16(_mm25…
58 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[N:N+8]…
60 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[N:N+8]…
64ABC[N:N+16]} = _mm_shuffle_epi32(${_MM_PACKXS_EPI16}(_mm256_castsi256_si128(vout${ABC[N:N+4]}${ABC
66 …__m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = ${_MM_PACKXS_EPI16}(vout${ABC[N:N+8]}, vout${ABC[N:N+8]});
[all …]

12345678910>>...29