• Home
  • Raw
  • Download

Lines Matching refs:ABC

13 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
48 const __m128i va${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) input_a));
50 …const __m128i va${ABC[N:N+8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) (input_a + $…
52 __m128i va${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) input_a);
54 __m128i va${ABC[N:N+8]} = _mm_loadl_epi64((const __m128i*) (input_a + ${N}));
61 va${ABC[N:N+8]} = _mm_unpacklo_epi8(va${ABC[N:N+8]}, vzero);
64 va${ABC[N:N+8]} = _mm_srai_epi16(_mm_unpacklo_epi8(va${ABC[N:N+8]}, va${ABC[N:N+8]}), 8);
67 const __m128i vxa${ABC[N:N+8]} = _mm_sub_epi16(va${ABC[N:N+8]}, va_zero_point);
70 const __m128i vprod${ABC[N:N+8]}lo = _mm_mullo_epi16(vxa${ABC[N:N+8]}, vxb);
71 const __m128i vprod${ABC[N:N+8]}hi = _mm_mulhi_epi16(vxa${ABC[N:N+8]}, vxb);
74 … const __m128i vprod${ABC[N:N+4]} = _mm_unpacklo_epi16(vprod${ABC[N:N+8]}lo, vprod${ABC[N:N+8]}hi);
75 …const __m128i vprod${ABC[N+4:N+8]} = _mm_unpackhi_epi16(vprod${ABC[N:N+8]}lo, vprod${ABC[N:N+8]}hi…
78 __m128 vfpacc${ABC[N:N+4]} = _mm_cvtepi32_ps(vprod${ABC[N:N+4]});
81 vfpacc${ABC[N:N+4]} = _mm_mul_ps(vfpacc${ABC[N:N+4]}, vscale);
84 const __m128i vacc${ABC[N:N+4]} = _mm_cvtps_epi32(vfpacc${ABC[N:N+4]});
87 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[N:N+4]}, vacc${ABC[N+4:N+8]})…
91 vout${ABC[N:N+8]} = _mm_max_epi16(vout${ABC[N:N+8]}, voutput_min);
94 vout${ABC[N:N+8]} = _mm_min_epi16(vout${ABC[N:N+8]}, voutput_max);
98 __m128i vout${ABC[N:N+16]} = ${_MM_PACKXS_EPI16}(vout${ABC[N:N+8]}, vout${ABC[N+8:N+16]});
100 …__m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = ${_MM_PACKXS_EPI16}(vout${ABC[N:N+8]}, vout${ABC[N:N+8]});
105 vout${ABC[N:N+16]} = ${_MM_MAX_EPX8}(vout${ABC[N:N+16]}, voutput_min);
107 … vout${ABC[N:N+8]}${ABC[N:N+8]} = ${_MM_MAX_EPX8}(vout${ABC[N:N+8]}${ABC[N:N+8]}, voutput_min);
111 vout${ABC[N:N+16]} = ${_MM_MIN_EPX8}(vout${ABC[N:N+16]}, voutput_max);
113 … vout${ABC[N:N+8]}${ABC[N:N+8]} = ${_MM_MIN_EPX8}(vout${ABC[N:N+8]}${ABC[N:N+8]}, voutput_max);
116 _mm_storeu_si128((__m128i*) output, vout${ABC[0:16]});
118 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
121 _mm_storeu_si128((__m128i*) (output + ${N}), vout${ABC[N:N+16]});
123 _mm_storel_epi64((__m128i*) (output + ${N}), vout${ABC[N:N+8]}${ABC[N:N+8]});
129 … const __m128i va${ABC[0:8]} = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) input_a));
131 __m128i va${ABC[0:8]} = _mm_loadl_epi64((const __m128i*) input_a);
138 va${ABC[0:8]} = _mm_unpacklo_epi8(va${ABC[0:8]}, vzero);
140 va${ABC[0:8]} = _mm_srai_epi16(_mm_unpacklo_epi8(va${ABC[0:8]}, va${ABC[0:8]}), 8);
142 const __m128i vxa${ABC[0:8]} = _mm_sub_epi16(va${ABC[0:8]}, va_zero_point);
144 const __m128i vprod${ABC[0:8]}lo = _mm_mullo_epi16(vxa${ABC[0:8]}, vxb);
145 const __m128i vprod${ABC[0:8]}hi = _mm_mulhi_epi16(vxa${ABC[0:8]}, vxb);
147 const __m128i vprod${ABC[0:4]} = _mm_unpacklo_epi16(vprod${ABC[0:8]}lo, vprod${ABC[0:8]}hi);
148 const __m128i vprod${ABC[4:8]} = _mm_unpackhi_epi16(vprod${ABC[0:8]}lo, vprod${ABC[0:8]}hi);
150 __m128 vfpacc${ABC[0:4]} = _mm_cvtepi32_ps(vprod${ABC[0:4]});
151 __m128 vfpacc${ABC[4:8]} = _mm_cvtepi32_ps(vprod${ABC[4:8]});
153 vfpacc${ABC[0:4]} = _mm_mul_ps(vfpacc${ABC[0:4]}, vscale);
154 vfpacc${ABC[4:8]} = _mm_mul_ps(vfpacc${ABC[4:8]}, vscale);
156 const __m128i vacc${ABC[0:4]} = _mm_cvtps_epi32(vfpacc${ABC[0:4]});
157 const __m128i vacc${ABC[4:8]} = _mm_cvtps_epi32(vfpacc${ABC[4:8]});
159 …__m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[0:4]}, vacc${ABC[4:8]}), voutpu…
161 vout${ABC[0:8]} = _mm_max_epi16(vout${ABC[0:8]}, voutput_min);
162 vout${ABC[0:8]} = _mm_min_epi16(vout${ABC[0:8]}, voutput_max);
164 __m128i vout${ABC[0:8]}${ABC[0:8]} = ${_MM_PACKXS_EPI16}(vout${ABC[0:8]}, vout${ABC[0:8]});
166 vout${ABC[0:8]}${ABC[0:8]} = ${_MM_MAX_EPX8}(vout${ABC[0:8]}${ABC[0:8]}, voutput_min);
167 vout${ABC[0:8]}${ABC[0:8]} = ${_MM_MIN_EPX8}(vout${ABC[0:8]}${ABC[0:8]}, voutput_max);
171 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
176 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]}));
177 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi64(vout${ABC[0:8]}${ABC[0:8]}, 32);
182 … unaligned_store_u16(output, (uint16_t) _mm_extract_epi16(vout${ABC[0:8]}${ABC[0:8]}, 0));
184 unaligned_store_u16(output, (uint16_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]}));
185 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi32(vout${ABC[0:8]}${ABC[0:8]}, 16);
190 *output = (${XINT8_T}) _mm_extract_epi8(vout${ABC[0:8]}${ABC[0:8]}, 0);
192 *output = (int32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]});
198 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]}));
199 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi64(vout${ABC[0:8]}${ABC[0:8]}, 32);
204 … unaligned_store_u16(output, (uint16_t) _mm_extract_epi16(vout${ABC[0:8]}${ABC[0:8]}, 0));
206 unaligned_store_u16(output, (uint16_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]}));
207 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi32(vout${ABC[0:8]}${ABC[0:8]}, 16);
212 *output = (${XINT8_T}) _mm_extract_epi8(vout${ABC[0:8]}${ABC[0:8]}, 0);
214 *output = (${XINT8_T}) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]});