Home
last modified time | relevance | path

Searched refs:vxb (Results 1 – 12 of 12) sorted by relevance

/external/XNNPACK/src/qu8-vadd/
Dminmax-sse2.c40 const __m128i vxb = _mm_unpacklo_epi8(vb, vzero); in xnn_qu8_vadd_minmax_ukernel__sse2() local
47 const __m128i vb_product_lo = _mm_mullo_epi16(vxb, vb_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2()
49 _mm_add_epi16(_mm_mulhi_epu16(vxb, vb_multiplier_lo), _mm_mullo_epi16(vxb, vb_multiplier_hi)); in xnn_qu8_vadd_minmax_ukernel__sse2()
82 const __m128i vxb = _mm_unpacklo_epi8(vb, vzero); in xnn_qu8_vadd_minmax_ukernel__sse2() local
89 const __m128i vb_product_lo = _mm_mullo_epi16(vxb, vb_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2()
91 _mm_add_epi16(_mm_mulhi_epu16(vxb, vb_multiplier_lo), _mm_mullo_epi16(vxb, vb_multiplier_hi)); in xnn_qu8_vadd_minmax_ukernel__sse2()
Dminmax-neon.c156 const int16x8_t vxb = vreinterpretq_s16_u16(vsubl_u8(vb, vb_zero_point)); in xnn_qu8_vadd_minmax_ukernel__neon() local
166 vacc_lo = vmlaq_s32(vacc_lo, vmovl_s16(vget_low_s16(vxb)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
168 vacc_hi = vmlaq_s32(vacc_hi, vmovl_high_s16(vxb), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
170 vacc_hi = vmlaq_s32(vacc_hi, vmovl_s16(vget_high_s16(vxb)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
199 const int16x8_t vxb = vreinterpretq_s16_u16(vsubl_u8(vb, vb_zero_point)); in xnn_qu8_vadd_minmax_ukernel__neon() local
209 vacc_lo = vmlaq_s32(vacc_lo, vmovl_s16(vget_low_s16(vxb)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
211 vacc_hi = vmlaq_s32(vacc_hi, vmovl_high_s16(vxb), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
213 vacc_hi = vmlaq_s32(vacc_hi, vmovl_s16(vget_high_s16(vxb)), vb_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon()
/external/XNNPACK/src/qs8-igemm/
Dneon-mlal-lane.c.in83 const int16x8_t vxb${ABC[N:N+8]}c${K} = vmovl_s8(vb${ABC[N:N+8]}c${K});
86 …vacc${M}x${ABC[N:N+4]} = vmlal_lane_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vxb${ABC[N:N+8]}c${K}…
87 …vacc${M}x${ABC[N+4:N+8]} = vmlal_lane_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vxb${ABC[N:N+8]}…
92 const int16x8_t vxb${ABC[N:N+8]}c${K} = vmovl_s8(vb${ABC[N:N+8]}c${K});
95 …vacc${M}x${ABC[N:N+4]} = vmlal_lane_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vxb${ABC[N:N+8]}c${K}…
96 …vacc${M}x${ABC[N+4:N+8]} = vmlal_lane_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vxb${ABC[N:N+8]}…
107 const int16x8_t vxb${ABC[N:N+8]}c0 = vmovl_s8(vb${ABC[N:N+8]}c0);
111 …vacc${M}x${ABC[N:N+4]} = vmlal_lane_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vxb${ABC[N:N+8]}c0), …
112 …vacc${M}x${ABC[N+4:N+8]} = vmlal_lane_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vxb${ABC[N:N+8]}…
117 const int16x8_t vxb${ABC[N:N+8]}c1 = vmovl_s8(vb${ABC[N:N+8]}c1);
[all …]
DMRx4c8-wasmsimd.c.in90 const v128_t vxb${N} = wasm_i16x8_widen_low_i8x16(vb${N}${N+1});
91 const v128_t vxb${N+1} = wasm_i16x8_widen_high_i8x16(vb${N}${N+1});
94 const v128_t vprod${M}x${N} = wasm_i16x8_mul(vxb${N}, vxa${M});
98 const v128_t vprod${M}x${N+1} = wasm_i16x8_mul(vxb${N+1}, vxa${M});
108 const v128_t vxb${N} = wasm_i16x8_load_8x8(w);
110 …const v128_t vxb${N} = wasm_i16x8_load_8x8((const void*) ((uintptr_t) w + ${N * 8} * sizeof(int8_t…
113 const v128_t vxb${N} = wasm_v128_load(w);
115 … const v128_t vxb${N} = wasm_v128_load((const void*) ((uintptr_t) w + ${N * 8} * sizeof(int16_t)));
118 const v128_t vprod${M}x${N} = wasm_i16x8_mul(vxa${M}, vxb${N});
DMRx4c8-sse.c.in101 const __m128i vxb${N} = _mm_unpacklo_epi8(vb${N}${N+1}, vsb${N}${N+1});
102 const __m128i vxb${N+1} = _mm_unpackhi_epi8(vb${N}${N+1}, vsb${N}${N+1});
106 vacc${M}x${N} = _mm_maddd_epi16(vxa${M}, vxb${N}, vacc${M}x${N});
107 vacc${M}x${N+1} = _mm_maddd_epi16(vxa${M}, vxb${N+1}, vacc${M}x${N+1});
109 vacc${M}x${N} = _mm_add_epi32(vacc${M}x${N}, _mm_madd_epi16(vxa${M}, vxb${N}));
110 … vacc${M}x${N+1} = _mm_add_epi32(vacc${M}x${N+1}, _mm_madd_epi16(vxa${M}, vxb${N+1}));
118 const __m128i vxb${N} = _mm_cvtepi8_epi16(vb${N});
120 … const __m128i vxb${N} = _mm_unpacklo_epi8(vb${N}, _mm_cmpgt_epi8(_mm_setzero_si128(), vb${N}));
124 vacc${M}x${N} = _mm_maddd_epi16(vxa${M}, vxb${N}, vacc${M}x${N});
126 vacc${M}x${N} = _mm_add_epi32(vacc${M}x${N}, _mm_madd_epi16(vxa${M}, vxb${N}));
DMRx4c2-sse.c.in99 const __m128i vxb${K} = _mm_unpacklo_epi8(vb${K}${K+1}, vsb${K}${K+1});
100 const __m128i vxb${K+1} = _mm_unpackhi_epi8(vb${K}${K+1}, vsb${K}${K+1});
105 … _mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K}, ${K}, ${K}, ${K})), vxb${K}, vacc${M}x0123);
108 … _mm_madd_epi16(_mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K}, ${K}, ${K}, ${K})), vxb${K}));
113 …_mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K+1}, ${K+1}, ${K+1}, ${K+1})), vxb${K+1}, vacc${M}x0123);
116 …mm_madd_epi16(_mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K+1}, ${K+1}, ${K+1}, ${K+1})), vxb${K+1}));
124 const __m128i vxb${K} = _mm_cvtepi8_epi16(vb${K});
126 … const __m128i vxb${K} = _mm_unpacklo_epi8(vb${K}, _mm_cmpgt_epi8(_mm_setzero_si128(), vb${K}));
131 … _mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K}, ${K}, ${K}, ${K})), vxb${K}, vacc${M}x0123);
134 … _mm_madd_epi16(_mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K}, ${K}, ${K}, ${K})), vxb${K}));
DMRx8c8-avx2.c.in92 const __m256i vxb${N}${N+1} = _mm256_cvtepi8_epi16(vb${N}${N+1});
95 …${M}x${N}${N+1} = _mm256_add_epi32(vacc${M}x${N}${N+1}, _mm256_madd_epi16(vxa${M}, vxb${N}${N+1}));
/external/XNNPACK/src/qs8-gemm/
Dneon-mlal-lane.c.in75 const int16x8_t vxb${ABC[N:N+8]}c${K} = vmovl_s8(vb${ABC[N:N+8]}c${K});
78 …vacc${M}x${ABC[N:N+4]} = vmlal_lane_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vxb${ABC[N:N+8]}c${K}…
79 …vacc${M}x${ABC[N+4:N+8]} = vmlal_lane_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vxb${ABC[N:N+8]}…
84 const int16x8_t vxb${ABC[N:N+8]}c${K} = vmovl_s8(vb${ABC[N:N+8]}c${K});
87 …vacc${M}x${ABC[N:N+4]} = vmlal_lane_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vxb${ABC[N:N+8]}c${K}…
88 …vacc${M}x${ABC[N+4:N+8]} = vmlal_lane_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vxb${ABC[N:N+8]}…
99 const int16x8_t vxb${ABC[N:N+8]}c0 = vmovl_s8(vb${ABC[N:N+8]}c0);
103 …vacc${M}x${ABC[N:N+4]} = vmlal_lane_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vxb${ABC[N:N+8]}c0), …
104 …vacc${M}x${ABC[N+4:N+8]} = vmlal_lane_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vxb${ABC[N:N+8]}…
109 const int16x8_t vxb${ABC[N:N+8]}c1 = vmovl_s8(vb${ABC[N:N+8]}c1);
[all …]
DMRx4c8-wasmsimd.c.in82 const v128_t vxb${N} = wasm_i16x8_widen_low_i8x16(vb${N}${N+1});
83 const v128_t vxb${N+1} = wasm_i16x8_widen_high_i8x16(vb${N}${N+1});
86 const v128_t vprod${M}x${N} = wasm_i16x8_mul(vxb${N}, vxa${M});
90 const v128_t vprod${M}x${N+1} = wasm_i16x8_mul(vxb${N+1}, vxa${M});
100 const v128_t vxb${N} = wasm_i16x8_load_8x8(w);
102 …const v128_t vxb${N} = wasm_i16x8_load_8x8((const void*) ((uintptr_t) w + ${N * 8} * sizeof(int8_t…
105 const v128_t vxb${N} = wasm_v128_load(w);
107 … const v128_t vxb${N} = wasm_v128_load((const void*) ((uintptr_t) w + ${N * 8} * sizeof(int16_t)));
110 const v128_t vprod${M}x${N} = wasm_i16x8_mul(vxa${M}, vxb${N});
DMRx4c8-sse.c.in96 const __m128i vxb${N} = _mm_unpacklo_epi8(vb${N}${N+1}, vsb${N}${N+1});
97 const __m128i vxb${N+1} = _mm_unpackhi_epi8(vb${N}${N+1}, vsb${N}${N+1});
101 vacc${M}x${N} = _mm_maddd_epi16(vxa${M}, vxb${N}, vacc${M}x${N});
102 vacc${M}x${N+1} = _mm_maddd_epi16(vxa${M}, vxb${N+1}, vacc${M}x${N+1});
104 vacc${M}x${N} = _mm_add_epi32(vacc${M}x${N}, _mm_madd_epi16(vxa${M}, vxb${N}));
105 vacc${M}x${N+1} = _mm_add_epi32(vacc${M}x${N+1}, _mm_madd_epi16(vxa${M}, vxb${N+1}));
114 const __m128i vxb${N} = _mm_cvtepi8_epi16(vb${N});
116 … const __m128i vxb${N} = _mm_unpacklo_epi8(vb${N}, _mm_cmpgt_epi8(_mm_setzero_si128(), vb${N}));
119 const __m128i vxb${N} = _mm_load_si128((const __m128i*) w);
121 …const __m128i vxb${N} = _mm_load_si128((const __m128i*) ((uintptr_t) w + ${N * 8} * sizeof(int16_t…
[all …]
DMRx4c2-sse.c.in94 const __m128i vxb${K} = _mm_unpacklo_epi8(vb${K}${K+1}, vsb${K}${K+1});
95 const __m128i vxb${K+1} = _mm_unpackhi_epi8(vb${K}${K+1}, vsb${K}${K+1});
100 … _mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K}, ${K}, ${K}, ${K})), vxb${K}, vacc${M}x0123);
103 … _mm_madd_epi16(_mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K}, ${K}, ${K}, ${K})), vxb${K}));
108 …_mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K+1}, ${K+1}, ${K+1}, ${K+1})), vxb${K+1}, vacc${M}x0123);
111 …mm_madd_epi16(_mm_shuffle_epi32(vxa${M}, _MM_SHUFFLE(${K+1}, ${K+1}, ${K+1}, ${K+1})), vxb${K+1}));
120 const __m128i vxb${K} = _mm_cvtepi8_epi16(vb${K});
122 … const __m128i vxb${K} = _mm_unpacklo_epi8(vb${K}, _mm_cmpgt_epi8(_mm_setzero_si128(), vb${K}));
125 const __m128i vxb${K} = _mm_load_si128((const __m128i*) w);
127 …const __m128i vxb${K} = _mm_load_si128((const __m128i*) ((uintptr_t) w + ${K * 8} * sizeof(int16_t…
[all …]
DMRx8c8-avx2.c.in84 const __m256i vxb${N}${N+1} = _mm256_load_si256((const __m256i*) w);
86 …const __m256i vxb${N}${N+1} = _mm256_load_si256((const __m256i*) ((uintptr_t) w + ${N * 8} * sizeo…
92 const __m256i vxb${N}${N+1} = _mm256_cvtepi8_epi16(vb${N}${N+1});
95 …${M}x${N}${N+1} = _mm256_add_epi32(vacc${M}x${N}${N+1}, _mm256_madd_epi16(vxa${M}, vxb${N}${N+1}));