Lines Matching refs:vec_t
71 typedef __m128i vec_t; typedef
77 static inline vec_t vec_add(vec_t a, vec_t b) { return _mm_add_epi16(a, b); } in vec_add()
80 static inline vec_t vec_sub(vec_t a, vec_t b) { return _mm_sub_epi16(a, b); } in vec_sub()
84 static inline vec_t vec_mul(vec_t a, uint16_t b) { in vec_mul()
90 static inline vec_t vec_fma(vec_t a, vec_t b, uint16_t c) { in vec_fma()
95 static inline void vec3_rshift_word(vec_t v[3]) { in vec3_rshift_word()
112 static inline void vec4_rshift_word(vec_t v[4]) { in vec4_rshift_word()
134 static inline vec_t vec_merge_3_5(vec_t left, vec_t right) { in vec_merge_3_5()
140 static inline void poly3_vec_lshift1(vec_t a_s[6], vec_t a_a[6]) { in poly3_vec_lshift1()
141 vec_t carry_s = {0}; in poly3_vec_lshift1()
142 vec_t carry_a = {0}; in poly3_vec_lshift1()
145 vec_t next_carry_s = _mm_srli_epi64(a_s[i], 63); in poly3_vec_lshift1()
151 vec_t next_carry_a = _mm_srli_epi64(a_a[i], 63); in poly3_vec_lshift1()
161 static inline void poly3_vec_rshift1(vec_t a_s[6], vec_t a_a[6]) { in poly3_vec_rshift1()
162 vec_t carry_s = {0}; in poly3_vec_rshift1()
163 vec_t carry_a = {0}; in poly3_vec_rshift1()
166 const vec_t next_carry_s = _mm_slli_epi64(a_s[i], 63); in poly3_vec_rshift1()
172 const vec_t next_carry_a = _mm_slli_epi64(a_a[i], 63); in poly3_vec_rshift1()
182 static inline vec_t vec_broadcast_bit(vec_t a) { in vec_broadcast_bit()
195 typedef uint16x8_t vec_t; typedef
202 static inline vec_t vec_add(vec_t a, vec_t b) { return a + b; } in vec_add()
204 static inline vec_t vec_sub(vec_t a, vec_t b) { return a - b; } in vec_sub()
206 static inline vec_t vec_mul(vec_t a, uint16_t b) { return vmulq_n_u16(a, b); } in vec_mul()
208 static inline vec_t vec_fma(vec_t a, vec_t b, uint16_t c) { in vec_fma()
212 static inline void vec3_rshift_word(vec_t v[3]) { in vec3_rshift_word()
219 static inline void vec4_rshift_word(vec_t v[4]) { in vec4_rshift_word()
227 static inline vec_t vec_merge_3_5(vec_t left, vec_t right) { in vec_merge_3_5()
231 static inline uint16_t vec_get_word(vec_t v, unsigned i) { in vec_get_word()
237 static inline vec_t vec_broadcast_bit(vec_t a) { in vec_broadcast_bit()
238 a = (vec_t)vshrq_n_s16(((int16x8_t)a) << 15, 15); in vec_broadcast_bit()
242 static inline void poly3_vec_lshift1(vec_t a_s[6], vec_t a_a[6]) { in poly3_vec_lshift1()
243 vec_t carry_s = {0}; in poly3_vec_lshift1()
244 vec_t carry_a = {0}; in poly3_vec_lshift1()
245 const vec_t kZero = {0}; in poly3_vec_lshift1()
248 vec_t next_carry_s = a_s[i] >> 15; in poly3_vec_lshift1()
254 vec_t next_carry_a = a_a[i] >> 15; in poly3_vec_lshift1()
262 static inline void poly3_vec_rshift1(vec_t a_s[6], vec_t a_a[6]) { in poly3_vec_rshift1()
263 vec_t carry_s = {0}; in poly3_vec_rshift1()
264 vec_t carry_a = {0}; in poly3_vec_rshift1()
265 const vec_t kZero = {0}; in poly3_vec_rshift1()
268 vec_t next_carry_s = a_s[i] << 15; in poly3_vec_rshift1()
274 vec_t next_carry_a = a_a[i] << 15; in poly3_vec_rshift1()
744 static inline void poly3_vec_cswap(vec_t a_s[6], vec_t a_a[6], vec_t b_s[6], in poly3_vec_cswap()
745 vec_t b_a[6], const vec_t swap) { in poly3_vec_cswap()
747 const vec_t sum_s = swap & (a_s[i] ^ b_s[i]); in poly3_vec_cswap()
751 const vec_t sum_a = swap & (a_a[i] ^ b_a[i]); in poly3_vec_cswap()
758 static inline void poly3_vec_fmsub(vec_t a_s[6], vec_t a_a[6], vec_t b_s[6], in poly3_vec_fmsub()
759 vec_t b_a[6], const vec_t ms, in poly3_vec_fmsub()
760 const vec_t ma) { in poly3_vec_fmsub()
763 const vec_t s = b_s[i]; in poly3_vec_fmsub()
764 const vec_t a = b_a[i]; in poly3_vec_fmsub()
765 const vec_t product_a = a & ma; in poly3_vec_fmsub()
766 const vec_t product_s = (s ^ ms) & product_a; in poly3_vec_fmsub()
768 const vec_t out_s = a_s[i]; in poly3_vec_fmsub()
769 const vec_t out_a = a_a[i]; in poly3_vec_fmsub()
770 const vec_t t = out_a ^ product_a; in poly3_vec_fmsub()
780 const vec_t kZero = {0}; in poly3_invert_vec()
781 const vec_t kOne = {1}; in poly3_invert_vec()
782 static const uint8_t kBottomSixtyOne[sizeof(vec_t)] = { in poly3_invert_vec()
785 vec_t v_s[6], v_a[6], r_s[6], r_a[6], f_s[6], f_a[6], g_s[6], g_a[6]; in poly3_invert_vec()
795 memset(f_a, 0xff, 5 * sizeof(vec_t)); in poly3_invert_vec()
813 const vec_t g_has_constant_term = vec_broadcast_bit(g_a[0]); in poly3_invert_vec()
814 const vec_t mask_w = in poly3_invert_vec()
816 const vec_t mask = vec_broadcast_bit(mask_w) & g_has_constant_term; in poly3_invert_vec()
818 const vec_t c_a = vec_broadcast_bit(f_a[0] & g_a[0]); in poly3_invert_vec()
819 const vec_t c_s = vec_broadcast_bit((f_s[0] ^ g_s[0]) & c_a); in poly3_invert_vec()
907 #define COEFFICIENTS_PER_VEC (sizeof(vec_t) / sizeof(uint16_t))
922 vec_t vectors[VECS_PER_POLY];
949 static void poly_mul_vec_aux(vec_t *restrict out, vec_t *restrict scratch, in poly_mul_vec_aux()
950 const vec_t *restrict a, const vec_t *restrict b, in poly_mul_vec_aux()
987 vec_t result[4]; in poly_mul_vec_aux()
988 vec_t vec_a[3]; in poly_mul_vec_aux()
989 static const vec_t kZero = {0}; in poly_mul_vec_aux()
1053 vec_t result[6]; in poly_mul_vec_aux()
1054 vec_t vec_a[4]; in poly_mul_vec_aux()
1055 static const vec_t kZero = {0}; in poly_mul_vec_aux()
1148 const vec_t *a_high = &a[low_len]; in poly_mul_vec_aux()
1149 const vec_t *b_high = &b[low_len]; in poly_mul_vec_aux()
1162 vec_t *const child_scratch = &scratch[2 * high_len]; in poly_mul_vec_aux()
1192 OPENSSL_STATIC_ASSERT(sizeof(out->v) == sizeof(vec_t) * VECS_PER_POLY, in poly_mul_vec()
1194 OPENSSL_STATIC_ASSERT(alignof(struct poly) == alignof(vec_t), in poly_mul_vec()
1197 vec_t prod[VECS_PER_POLY * 2]; in poly_mul_vec()
1198 vec_t scratch[172]; in poly_mul_vec()
1205 vec_t *out_vecs = (vec_t *)out->v; in poly_mul_vec()
1208 const vec_t prev = prod[VECS_PER_POLY - 1 + i]; in poly_mul_vec()
1209 const vec_t this = prod[VECS_PER_POLY + i]; in poly_mul_vec()