Home
last modified time | relevance | path

Searched refs:_mm_extract_epi16 (Results 1 – 25 of 296) sorted by relevance

12345678910>>...12

/external/libaom/libaom/av1/common/x86/
Dwarp_plane_sse2.c45 _mm_set_epi32(error_measure_lut[_mm_extract_epi16(diff_1, 3)], in av1_calc_frame_error_sse2()
46 error_measure_lut[_mm_extract_epi16(diff_1, 2)], in av1_calc_frame_error_sse2()
47 error_measure_lut[_mm_extract_epi16(diff_1, 1)], in av1_calc_frame_error_sse2()
48 error_measure_lut[_mm_extract_epi16(diff_1, 0)]); in av1_calc_frame_error_sse2()
50 _mm_set_epi32(error_measure_lut[_mm_extract_epi16(diff_1, 7)], in av1_calc_frame_error_sse2()
51 error_measure_lut[_mm_extract_epi16(diff_1, 6)], in av1_calc_frame_error_sse2()
52 error_measure_lut[_mm_extract_epi16(diff_1, 5)], in av1_calc_frame_error_sse2()
53 error_measure_lut[_mm_extract_epi16(diff_1, 4)]); in av1_calc_frame_error_sse2()
55 _mm_set_epi32(error_measure_lut[_mm_extract_epi16(diff_2, 3)], in av1_calc_frame_error_sse2()
56 error_measure_lut[_mm_extract_epi16(diff_2, 2)], in av1_calc_frame_error_sse2()
[all …]
/external/libavc/encoder/x86/
Dih264e_intra_modes_eval_ssse3.c237 sad_horz = _mm_extract_epi16(sad_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3()
288 sad_vert = _mm_extract_epi16(sad_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3()
299 dcval += _mm_extract_epi16(sad1_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3()
300 dcval += _mm_extract_epi16(sad1_8x16b, 4); in ih264e_evaluate_intra16x16_modes_ssse3()
308 dcval += _mm_extract_epi16(sad1_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3()
309 dcval += _mm_extract_epi16(sad1_8x16b, 4); in ih264e_evaluate_intra16x16_modes_ssse3()
361 sad_dc = _mm_extract_epi16(sad_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3()
532 sad[VERT_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); in ih264e_evaluate_intra_4x4_modes_ssse3()
543 sad[HORZ_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); in ih264e_evaluate_intra_4x4_modes_ssse3()
572 dcval += _mm_extract_epi16(temp_8x16b, 4); in ih264e_evaluate_intra_4x4_modes_ssse3()
[all …]
/external/libmpeg2/common/x86/
Dideint_cac_ssse3.c190 diff_sum = _mm_extract_epi16(diff, 0); in ideint_cac_8x8_ssse3()
191 diff_sum += _mm_extract_epi16(diff, 1); in ideint_cac_8x8_ssse3()
192 diff_sum += _mm_extract_epi16(diff, 2); in ideint_cac_8x8_ssse3()
193 diff_sum += _mm_extract_epi16(diff, 3); in ideint_cac_8x8_ssse3()
197 diff_sum = _mm_extract_epi16(diff, 4); in ideint_cac_8x8_ssse3()
198 diff_sum += _mm_extract_epi16(diff, 5); in ideint_cac_8x8_ssse3()
199 diff_sum += _mm_extract_epi16(diff, 6); in ideint_cac_8x8_ssse3()
200 diff_sum += _mm_extract_epi16(diff, 7); in ideint_cac_8x8_ssse3()
/external/XNNPACK/src/f32-sigmoid/gen/
Dsse41-lut64-p2-div-x24.c120 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
121 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
122 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
129 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
130 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
131 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
138 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
139 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
140 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
147 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
[all …]
Dsse41-lut64-p2-div-x16.c96 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
97 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
98 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
105 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
106 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
107 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
114 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
115 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
116 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
123 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
[all …]
Dsse41-lut64-p2-div-x20.c108 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
109 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
110 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
117 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
118 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
119 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
126 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
127 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
128 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
135 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
[all …]
Dsse41-lut64-p2-div-x12.c84 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
85 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
86 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
93 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
94 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
95 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
102 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
103 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
104 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
176 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
[all …]
Dsse2-lut64-p2-div-x20.c118 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
119 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
120 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
129 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
130 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
131 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
140 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
141 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
142 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
151 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
[all …]
Dsse41-lut64-p2-div-x8.c72 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
73 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
74 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
81 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
82 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
83 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
144 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
145 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
146 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
186 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
[all …]
Dsse2-lut64-p2-div-x24.c132 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
133 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
134 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
143 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
144 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
145 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
154 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
155 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
156 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
165 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
[all …]
Dsse2-lut64-p2-div-x12.c90 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
91 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
92 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
101 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
102 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
103 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
112 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
113 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
114 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
194 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
[all …]
Dsse2-lut64-p2-div-x16.c104 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
105 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
106 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
115 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
116 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
117 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
126 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
127 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
128 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
137 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
[all …]
Dsse2-lut64-p2-div-x8.c76 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
77 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
78 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
87 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
88 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
89 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
157 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
158 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
160 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
204 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
[all …]
Dsse41-lut64-p2-div-x4.c57 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
58 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
59 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
99 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
100 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
101 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-sse41-rr2-lut16-p3-x20.c114 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
115 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
116 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
123 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
124 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
125 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
132 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
133 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
134 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
141 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
[all …]
Dvelu-sse41-rr2-lut16-p3-x24.c126 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
127 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
128 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
135 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
136 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
137 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
144 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
145 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
146 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
153 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
[all …]
Dvelu-sse41-rr2-lut16-p3-x16.c102 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
103 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
104 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
111 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
112 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
113 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
120 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
121 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
122 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
129 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
[all …]
Dvelu-sse41-rr2-lut16-p3-x12.c90 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
91 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
92 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
99 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
100 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
101 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
108 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
109 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
110 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
188 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
[all …]
Dvelu-sse41-rr2-lut16-p3-x8.c78 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
79 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
80 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
87 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
88 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
89 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
154 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
155 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 2))), … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
156 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 6))), … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
197 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
[all …]
Dvelu-sse2-rr2-lut16-p3-x12.c96 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
97 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
98 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
107 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
108 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
109 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
118 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
119 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
120 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
205 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
[all …]
Dvelu-sse2-rr2-lut16-p3-x16.c110 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
111 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
112 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
121 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
122 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
123 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
132 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
133 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
134 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
143 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
[all …]
Dvelu-sse2-rr2-lut16-p3-x20.c124 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
125 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
126 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
135 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
136 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
137 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
146 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
147 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
148 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
157 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
[all …]
Dvelu-sse2-rr2-lut16-p3-x24.c138 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
139 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
140 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
149 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
150 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
151 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
160 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
161 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
162 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
171 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
[all …]
Dvelu-sse2-rr2-lut16-p3-x8.c82 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
83 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
84 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
93 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
94 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
95 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
166 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
167 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 2)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
169 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 6)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
214 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
[all …]
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_diamond_search_sad_avx.c201 const int32_t row0 = _mm_extract_epi16(v_diff_mv_w, 0); in vp9_diamond_search_sad_avx()
202 const int32_t col0 = _mm_extract_epi16(v_diff_mv_w, 1); in vp9_diamond_search_sad_avx()
203 const int32_t row1 = _mm_extract_epi16(v_diff_mv_w, 2); in vp9_diamond_search_sad_avx()
204 const int32_t col1 = _mm_extract_epi16(v_diff_mv_w, 3); in vp9_diamond_search_sad_avx()
205 const int32_t row2 = _mm_extract_epi16(v_diff_mv_w, 4); in vp9_diamond_search_sad_avx()
206 const int32_t col2 = _mm_extract_epi16(v_diff_mv_w, 5); in vp9_diamond_search_sad_avx()
207 const int32_t row3 = _mm_extract_epi16(v_diff_mv_w, 6); in vp9_diamond_search_sad_avx()
208 const int32_t col3 = _mm_extract_epi16(v_diff_mv_w, 7); in vp9_diamond_search_sad_avx()
253 uint32_t local_best_sad = _mm_extract_epi16(v_minp_w, 0); in vp9_diamond_search_sad_avx()
254 uint32_t local_best_idx = _mm_extract_epi16(v_minp_w, 1); in vp9_diamond_search_sad_avx()

12345678910>>...12