/external/libaom/libaom/av1/common/x86/ |
D | warp_plane_sse2.c | 45 _mm_set_epi32(error_measure_lut[_mm_extract_epi16(diff_1, 3)], in av1_calc_frame_error_sse2() 46 error_measure_lut[_mm_extract_epi16(diff_1, 2)], in av1_calc_frame_error_sse2() 47 error_measure_lut[_mm_extract_epi16(diff_1, 1)], in av1_calc_frame_error_sse2() 48 error_measure_lut[_mm_extract_epi16(diff_1, 0)]); in av1_calc_frame_error_sse2() 50 _mm_set_epi32(error_measure_lut[_mm_extract_epi16(diff_1, 7)], in av1_calc_frame_error_sse2() 51 error_measure_lut[_mm_extract_epi16(diff_1, 6)], in av1_calc_frame_error_sse2() 52 error_measure_lut[_mm_extract_epi16(diff_1, 5)], in av1_calc_frame_error_sse2() 53 error_measure_lut[_mm_extract_epi16(diff_1, 4)]); in av1_calc_frame_error_sse2() 55 _mm_set_epi32(error_measure_lut[_mm_extract_epi16(diff_2, 3)], in av1_calc_frame_error_sse2() 56 error_measure_lut[_mm_extract_epi16(diff_2, 2)], in av1_calc_frame_error_sse2() [all …]
|
/external/libavc/encoder/x86/ |
D | ih264e_intra_modes_eval_ssse3.c | 237 sad_horz = _mm_extract_epi16(sad_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3() 288 sad_vert = _mm_extract_epi16(sad_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3() 299 dcval += _mm_extract_epi16(sad1_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3() 300 dcval += _mm_extract_epi16(sad1_8x16b, 4); in ih264e_evaluate_intra16x16_modes_ssse3() 308 dcval += _mm_extract_epi16(sad1_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3() 309 dcval += _mm_extract_epi16(sad1_8x16b, 4); in ih264e_evaluate_intra16x16_modes_ssse3() 361 sad_dc = _mm_extract_epi16(sad_8x16b, 0); in ih264e_evaluate_intra16x16_modes_ssse3() 532 sad[VERT_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); in ih264e_evaluate_intra_4x4_modes_ssse3() 543 sad[HORZ_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4); in ih264e_evaluate_intra_4x4_modes_ssse3() 572 dcval += _mm_extract_epi16(temp_8x16b, 4); in ih264e_evaluate_intra_4x4_modes_ssse3() [all …]
|
/external/libmpeg2/common/x86/ |
D | ideint_cac_ssse3.c | 190 diff_sum = _mm_extract_epi16(diff, 0); in ideint_cac_8x8_ssse3() 191 diff_sum += _mm_extract_epi16(diff, 1); in ideint_cac_8x8_ssse3() 192 diff_sum += _mm_extract_epi16(diff, 2); in ideint_cac_8x8_ssse3() 193 diff_sum += _mm_extract_epi16(diff, 3); in ideint_cac_8x8_ssse3() 197 diff_sum = _mm_extract_epi16(diff, 4); in ideint_cac_8x8_ssse3() 198 diff_sum += _mm_extract_epi16(diff, 5); in ideint_cac_8x8_ssse3() 199 diff_sum += _mm_extract_epi16(diff, 6); in ideint_cac_8x8_ssse3() 200 diff_sum += _mm_extract_epi16(diff, 7); in ideint_cac_8x8_ssse3()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-lut64-p2-div-x24.c | 120 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 121 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 122 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 129 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 130 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 131 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 138 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 139 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 140 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 147 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() [all …]
|
D | sse41-lut64-p2-div-x16.c | 96 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 97 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 98 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 105 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 106 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 107 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 114 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 115 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 116 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 123 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() [all …]
|
D | sse41-lut64-p2-div-x20.c | 108 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 109 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 110 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 117 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 118 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 119 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 126 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 127 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 128 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 135 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() [all …]
|
D | sse41-lut64-p2-div-x12.c | 84 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 85 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 86 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 93 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 94 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 95 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 102 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 103 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 104 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 176 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() [all …]
|
D | sse2-lut64-p2-div-x20.c | 118 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 119 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 120 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 129 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 130 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 131 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 140 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 141 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 142 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 151 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() [all …]
|
D | sse41-lut64-p2-div-x8.c | 72 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 73 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 74 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 81 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 82 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 83 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 144 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 145 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 146 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 186 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() [all …]
|
D | sse2-lut64-p2-div-x24.c | 132 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 133 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 134 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 143 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 144 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 145 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 154 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 155 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 156 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 165 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() [all …]
|
D | sse2-lut64-p2-div-x12.c | 90 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 91 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 92 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 101 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 102 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 103 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 112 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 113 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 114 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 194 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() [all …]
|
D | sse2-lut64-p2-div-x16.c | 104 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 105 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 106 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 115 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 116 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 117 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 126 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 127 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 128 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 137 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() [all …]
|
D | sse2-lut64-p2-div-x8.c | 76 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 77 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 78 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 87 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 88 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 89 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 157 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 158 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 160 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 204 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() [all …]
|
D | sse41-lut64-p2-div-x4.c | 57 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 58 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 59 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 99 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 100 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 101 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6))), … in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse41-rr2-lut16-p3-x20.c | 114 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 115 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 116 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 123 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 124 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 125 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 132 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 133 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 134 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 141 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() [all …]
|
D | velu-sse41-rr2-lut16-p3-x24.c | 126 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 127 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 128 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 135 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 136 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 137 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 144 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 145 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 146 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 153 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() [all …]
|
D | velu-sse41-rr2-lut16-p3-x16.c | 102 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 103 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 104 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 111 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 112 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 113 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 120 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 121 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 122 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 129 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() [all …]
|
D | velu-sse41-rr2-lut16-p3-x12.c | 90 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 91 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 92 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 99 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 100 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 101 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 108 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 109 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 110 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 188 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() [all …]
|
D | velu-sse41-rr2-lut16-p3-x8.c | 78 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 79 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 80 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 87 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 88 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 89 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 154 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 155 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 2))), … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 156 …const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 6))), … in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 197 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() [all …]
|
D | velu-sse2-rr2-lut16-p3-x12.c | 96 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 97 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 98 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 107 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 108 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 109 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 118 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 119 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 120 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 205 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() [all …]
|
D | velu-sse2-rr2-lut16-p3-x16.c | 110 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 111 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 112 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 121 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 122 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 123 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 132 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 133 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 134 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 143 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() [all …]
|
D | velu-sse2-rr2-lut16-p3-x20.c | 124 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 125 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 126 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 135 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 136 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 137 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 146 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 147 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 148 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 157 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() [all …]
|
D | velu-sse2-rr2-lut16-p3-x24.c | 138 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 139 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 140 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 149 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 150 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 151 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 160 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 161 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 162 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 171 const uint32_t vidxD = (uint32_t) _mm_extract_epi16(vidxCDEF, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() [all …]
|
D | velu-sse2-rr2-lut16-p3-x8.c | 82 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 83 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 84 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 93 const uint32_t vidx5 = (uint32_t) _mm_extract_epi16(vidx4567, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 94 const uint32_t vidx6 = (uint32_t) _mm_extract_epi16(vidx4567, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 95 const uint32_t vidx7 = (uint32_t) _mm_extract_epi16(vidx4567, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 166 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 167 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 2)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 169 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 6)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 214 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() [all …]
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_diamond_search_sad_avx.c | 201 const int32_t row0 = _mm_extract_epi16(v_diff_mv_w, 0); in vp9_diamond_search_sad_avx() 202 const int32_t col0 = _mm_extract_epi16(v_diff_mv_w, 1); in vp9_diamond_search_sad_avx() 203 const int32_t row1 = _mm_extract_epi16(v_diff_mv_w, 2); in vp9_diamond_search_sad_avx() 204 const int32_t col1 = _mm_extract_epi16(v_diff_mv_w, 3); in vp9_diamond_search_sad_avx() 205 const int32_t row2 = _mm_extract_epi16(v_diff_mv_w, 4); in vp9_diamond_search_sad_avx() 206 const int32_t col2 = _mm_extract_epi16(v_diff_mv_w, 5); in vp9_diamond_search_sad_avx() 207 const int32_t row3 = _mm_extract_epi16(v_diff_mv_w, 6); in vp9_diamond_search_sad_avx() 208 const int32_t col3 = _mm_extract_epi16(v_diff_mv_w, 7); in vp9_diamond_search_sad_avx() 253 uint32_t local_best_sad = _mm_extract_epi16(v_minp_w, 0); in vp9_diamond_search_sad_avx() 254 uint32_t local_best_idx = _mm_extract_epi16(v_minp_w, 1); in vp9_diamond_search_sad_avx()
|