Lines Matching refs:ABC
11 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
52 …const __m128i va${ABC[0:4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(inpu…
53 …const __m128i vb${ABC[0:4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(inpu…
55 …const __m128i va${ABC[N:N+4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(in…
56 …const __m128i vb${ABC[N:N+4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(in…
62 __m128i vacc${ABC[N:N+4]} = _mm_macc_epi32(va${ABC[N:N+4]}, va_multiplier, vbias);
65 vacc${ABC[N:N+4]} = _mm_macc_epi32(vb${ABC[N:N+4]}, vb_multiplier, vacc${ABC[N:N+4]});
68 … __m128i vacc${ABC[N:N+4]} = _mm_add_epi32(vbias, _mm_mullo_epi32(va${ABC[N:N+4]}, va_multiplier));
71 …vacc${ABC[N:N+4]} = _mm_add_epi32(vacc${ABC[N:N+4]}, _mm_mullo_epi32(vb${ABC[N:N+4]}, vb_multiplie…
74 vacc${ABC[N:N+4]} = _mm_sra_epi32(vacc${ABC[N:N+4]}, vshift);
77 …const __m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[N:N+4]}, vacc${ABC[N+4:…
81 __m128i vout${ABC[N:N+16]} = ${_MM_PACKXS_EPI16}(vout${ABC[N:N+8]}, vout${ABC[N+8:N+16]});
83 …__m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = ${_MM_PACKXS_EPI16}(vout${ABC[N:N+8]}, vout${ABC[N:N+8]});
87 vout${ABC[N:N+16]} = ${_MM_MAX_EPX8}(vout${ABC[N:N+16]}, voutput_min);
89 … vout${ABC[N:N+8]}${ABC[N:N+8]} = ${_MM_MAX_EPX8}(vout${ABC[N:N+8]}${ABC[N:N+8]}, voutput_min);
93 vout${ABC[N:N+16]} = ${_MM_MIN_EPX8}(vout${ABC[N:N+16]}, voutput_max);
95 … vout${ABC[N:N+8]}${ABC[N:N+8]} = ${_MM_MIN_EPX8}(vout${ABC[N:N+8]}${ABC[N:N+8]}, voutput_max);
98 _mm_storeu_si128((__m128i*) output, vout${ABC[0:16]});
100 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
103 _mm_storeu_si128((__m128i*) (output + ${N}), vout${ABC[N:N+16]});
105 _mm_storel_epi64((__m128i*) (output + ${N}), vout${ABC[N:N+8]}${ABC[N:N+8]});
110 …const __m128i va${ABC[0:4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(inpu…
111 …const __m128i vb${ABC[0:4]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(inpu…
112 …const __m128i va${ABC[4:8]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(inpu…
113 …const __m128i vb${ABC[4:8]} = ${_MM_CVTEPX8_EPI32}(_mm_cvtsi32_si128((int) unaligned_load_s32(inpu…
119 __m128i vacc${ABC[0:4]} = _mm_macc_epi32(va${ABC[0:4]}, va_multiplier, vbias);
120 __m128i vacc${ABC[4:8]} = _mm_macc_epi32(va${ABC[4:8]}, va_multiplier, vbias);
122 vacc${ABC[0:4]} = _mm_macc_epi32(vb${ABC[0:4]}, vb_multiplier, vacc${ABC[0:4]});
123 vacc${ABC[4:8]} = _mm_macc_epi32(vb${ABC[4:8]}, vb_multiplier, vacc${ABC[4:8]});
125 … __m128i vacc${ABC[0:4]} = _mm_add_epi32(vbias, _mm_mullo_epi32(va${ABC[0:4]}, va_multiplier));
126 … __m128i vacc${ABC[4:8]} = _mm_add_epi32(vbias, _mm_mullo_epi32(va${ABC[4:8]}, va_multiplier));
128 … vacc${ABC[0:4]} = _mm_add_epi32(vacc${ABC[0:4]}, _mm_mullo_epi32(vb${ABC[0:4]}, vb_multiplier));
129 … vacc${ABC[4:8]} = _mm_add_epi32(vacc${ABC[4:8]}, _mm_mullo_epi32(vb${ABC[4:8]}, vb_multiplier));
131 vacc${ABC[0:4]} = _mm_sra_epi32(vacc${ABC[0:4]}, vshift);
132 vacc${ABC[4:8]} = _mm_sra_epi32(vacc${ABC[4:8]}, vshift);
134 …const __m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[0:4]}, vacc${ABC[4:8]}), …
136 __m128i vout${ABC[0:8]}${ABC[0:8]} = ${_MM_PACKXS_EPI16}(vout${ABC[0:8]}, vout${ABC[0:8]});
137 vout${ABC[0:8]}${ABC[0:8]} = ${_MM_MAX_EPX8}(vout${ABC[0:8]}${ABC[0:8]}, voutput_min);
138 vout${ABC[0:8]}${ABC[0:8]} = ${_MM_MIN_EPX8}(vout${ABC[0:8]}${ABC[0:8]}, voutput_max);
142 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
147 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]}));
148 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi64(vout${ABC[0:8]}${ABC[0:8]}, 32);
152 … unaligned_store_u16(output, (uint16_t) _mm_extract_epi16(vout${ABC[0:8]}${ABC[0:8]}, 0));
153 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi32(vout${ABC[0:8]}${ABC[0:8]}, 16);
157 *output = (${XINT8_T}) _mm_extract_epi8(vout${ABC[0:8]}${ABC[0:8]}, 0);
163 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vout${ABC[0:8]}${ABC[0:8]}));
164 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi64(vout${ABC[0:8]}${ABC[0:8]}, 32);
168 unaligned_store_u16(output, (uint16_t) _mm_extract_epi16(vout${ABC[0:8]}${ABC[0:8]}, 0));
169 vout${ABC[0:8]}${ABC[0:8]} = _mm_srli_epi32(vout${ABC[0:8]}${ABC[0:8]}, 16);
173 *output = (${XINT8_T}) _mm_extract_epi8(vout${ABC[0:8]}${ABC[0:8]}, 0);