/external/libvpx/libvpx/vpx_dsp/x86/ |
D | highbd_intrapred_intrin_ssse3.c | 59 *row = _mm_alignr_epi8(*ar, *row, 2); in d45_store_8() 70 const __m128i BCDEFGHH = _mm_alignr_epi8(HHHHHHHH, ABCDEFGH, 2); in vpx_highbd_d45_predictor_8x8_ssse3() 71 const __m128i CDEFGHHH = _mm_alignr_epi8(HHHHHHHH, ABCDEFGH, 4); in vpx_highbd_d45_predictor_8x8_ssse3() 89 *row_0 = _mm_alignr_epi8(*row_1, *row_0, 2); in d45_store_16() 90 *row_1 = _mm_alignr_epi8(*ar, *row_1, 2); in d45_store_16() 103 const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); in vpx_highbd_d45_predictor_16x16_ssse3() 104 const __m128i B1 = _mm_alignr_epi8(AR, A1, 2); in vpx_highbd_d45_predictor_16x16_ssse3() 105 const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); in vpx_highbd_d45_predictor_16x16_ssse3() 106 const __m128i C1 = _mm_alignr_epi8(AR, A1, 4); in vpx_highbd_d45_predictor_16x16_ssse3() 140 const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); in vpx_highbd_d45_predictor_32x32_ssse3() [all …]
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-ssse3-2x4.c | 100 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 101 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 102 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 103 …const __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 117 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 118 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 119 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 120 …const __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 162 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 163 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() [all …]
|
D | 3x3p1-minmax-ssse3-6x4.c | 156 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 157 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 158 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 159 …const __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 160 …const __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 161 …const __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 162 …const __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 163 …const __m128 vi7x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi7x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 193 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 194 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() [all …]
|
D | 3x3p1-minmax-ssse3-1x4.c | 86 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 87 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 88 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 98 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 99 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 100 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 130 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 131 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 132 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 139 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() [all …]
|
D | 3x3p1-minmax-ssse3-4x4.c | 128 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 129 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 130 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 131 …const __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 132 …const __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 133 …const __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 155 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 156 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 157 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 158 …const __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() [all …]
|
D | 3x3p1-minmax-ssse3-3x4.c | 114 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 115 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 116 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 117 …const __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 118 …const __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 136 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 137 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 138 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 139 …const __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 140 …const __m128 vi4x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() [all …]
|
D | 3x3p1-minmax-ssse3-5x4.c | 142 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 143 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 144 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 145 …const __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 146 …const __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 147 …const __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 148 …const __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 174 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 175 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 176 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc2.c | 86 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 87 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 88 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 98 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 99 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 100 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 131 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 132 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 133 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 140 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc3.c | 86 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 87 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 88 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 98 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 99 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 100 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 132 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 133 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 134 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 141 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc4.c | 86 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 87 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 88 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 98 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 99 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 100 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 133 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 134 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 135 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 142 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() [all …]
|
D | 3x3p1-minmax-ssse3-2x4-acc2.c | 100 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 101 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 102 …const __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 103 …const __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 117 …const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 118 …const __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 119 …const __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 120 …const __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 164 …const __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 165 …const __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() [all …]
|
/external/clang/test/CodeGen/ |
D | palignr.c | 4 #define _mm_alignr_epi8(a, b, n) (__builtin_ia32_palignr128((a), (b), (n))) macro 8 int4 align1(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 15); } in align1() 12 int4 align2(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 16); } in align2() 14 int4 align3(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 17); } in align3() 16 int4 align4(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 32); } in align4()
|
/external/llvm-project/clang/test/CodeGen/ |
D | palignr.c | 4 #define _mm_alignr_epi8(a, b, n) (__builtin_ia32_palignr128((a), (b), (n))) macro 8 int4 align1(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 15); } in align1() 12 int4 align2(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 16); } in align2() 14 int4 align3(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 17); } in align3() 16 int4 align4(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 32); } in align4()
|
/external/python/cpython3/Modules/_blake2/impl/ |
D | blake2b-load-sse41.h | 61 b1 = _mm_alignr_epi8(m3, m7, 8); \ 84 b0 = _mm_alignr_epi8(m6, m5, 8); \ 109 b1 = _mm_alignr_epi8(m2, m0, 8); \ 172 b0 = _mm_alignr_epi8(m6, m0, 8); \ 221 b1 = _mm_alignr_epi8(m5, m6, 8); \ 252 b0 = _mm_alignr_epi8(m7, m5, 8); \ 277 b1 = _mm_alignr_epi8(m0, m5, 8); \ 285 b1 = _mm_alignr_epi8(m4, m1, 8); \ 293 b1 = _mm_alignr_epi8(m5, m0, 8); \ 332 b0 = _mm_alignr_epi8(m7, m5, 8); \ [all …]
|
D | blake2b-round.h | 87 t0 = _mm_alignr_epi8(row2h, row2l, 8); \ 88 t1 = _mm_alignr_epi8(row2l, row2h, 8); \ 96 t0 = _mm_alignr_epi8(row4h, row4l, 8); \ 97 t1 = _mm_alignr_epi8(row4l, row4h, 8); \ 102 t0 = _mm_alignr_epi8(row2l, row2h, 8); \ 103 t1 = _mm_alignr_epi8(row2h, row2l, 8); \ 111 t0 = _mm_alignr_epi8(row4l, row4h, 8); \ 112 t1 = _mm_alignr_epi8(row4h, row4l, 8); \
|
/external/libhevc/common/x86/ |
D | ihevc_sao_ssse3_intr.c | 957 left_store_16x8b = _mm_alignr_epi8(left_store_16x8b, left_store_16x8b, 2); in ihevc_sao_edge_offset_class0_ssse3() 959 left1_16x8b = _mm_alignr_epi8(src_temp1_16x8b, left_store_16x8b, 15); in ihevc_sao_edge_offset_class0_ssse3() 960 left_store_16x8b = _mm_alignr_epi8(left_store_16x8b, src_temp1_16x8b, 15); in ihevc_sao_edge_offset_class0_ssse3() 962 left0_16x8b = _mm_alignr_epi8(src_temp0_16x8b, left_store_16x8b, 15); in ihevc_sao_edge_offset_class0_ssse3() 963 left_store_16x8b = _mm_alignr_epi8(left_store_16x8b, src_temp0_16x8b, 15); in ihevc_sao_edge_offset_class0_ssse3() 1078 left_store_16x8b = _mm_alignr_epi8(left_store_16x8b, left_store_16x8b, 4); in ihevc_sao_edge_offset_class0_ssse3() 1081 cmp_lt1_16x8b = _mm_alignr_epi8(cmp_gt1_16x8b, left_store_16x8b, 15); in ihevc_sao_edge_offset_class0_ssse3() 1082 left_store_16x8b = _mm_alignr_epi8(left_store_16x8b, edge0_16x8b, 15); in ihevc_sao_edge_offset_class0_ssse3() 1085 left1_16x8b = _mm_alignr_epi8(src_temp1_16x8b, left_store_16x8b, 15); in ihevc_sao_edge_offset_class0_ssse3() 1086 left_store_16x8b = _mm_alignr_epi8(left_store_16x8b, edge0_16x8b, 15); in ihevc_sao_edge_offset_class0_ssse3() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | intra_edge_sse4.c | 71 in0 = _mm_alignr_epi8(in1, in0, 8); in av1_filter_intra_edge_sse4_1() 108 in0 = _mm_alignr_epi8(in1, in0, 8); in av1_filter_intra_edge_sse4_1() 149 __m128i in1 = _mm_alignr_epi8(in8, in0, 2); in av1_filter_intra_edge_high_sse4_1() 150 __m128i in2 = _mm_alignr_epi8(in8, in0, 4); in av1_filter_intra_edge_high_sse4_1() 178 __m128i in1 = _mm_alignr_epi8(in8, in0, 2); in av1_filter_intra_edge_high_sse4_1() 179 __m128i in2 = _mm_alignr_epi8(in8, in0, 4); in av1_filter_intra_edge_high_sse4_1() 180 __m128i in3 = _mm_alignr_epi8(in8, in0, 6); in av1_filter_intra_edge_high_sse4_1() 181 __m128i in4 = _mm_alignr_epi8(in8, in0, 8); in av1_filter_intra_edge_high_sse4_1() 238 __m128i in8 = _mm_alignr_epi8(in16, in0, 8); in av1_upsample_intra_edge_sse4_1() 255 __m128i in1 = _mm_alignr_epi8(in16, in0, 1); in av1_upsample_intra_edge_sse4_1() [all …]
|
D | highbd_convolve_2d_ssse3.c | 70 s[0] = _mm_alignr_epi8(row01, row00, 0); in av1_highbd_convolve_2d_sr_ssse3() 71 s[1] = _mm_alignr_epi8(row01, row00, 4); in av1_highbd_convolve_2d_sr_ssse3() 72 s[2] = _mm_alignr_epi8(row01, row00, 8); in av1_highbd_convolve_2d_sr_ssse3() 73 s[3] = _mm_alignr_epi8(row01, row00, 12); in av1_highbd_convolve_2d_sr_ssse3() 80 s[0] = _mm_alignr_epi8(row01, row00, 2); in av1_highbd_convolve_2d_sr_ssse3() 81 s[1] = _mm_alignr_epi8(row01, row00, 6); in av1_highbd_convolve_2d_sr_ssse3() 82 s[2] = _mm_alignr_epi8(row01, row00, 10); in av1_highbd_convolve_2d_sr_ssse3() 83 s[3] = _mm_alignr_epi8(row01, row00, 14); in av1_highbd_convolve_2d_sr_ssse3()
|
D | highbd_wiener_convolve_ssse3.c | 78 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23); in av1_highbd_wiener_convolve_add_src_ssse3() 80 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45); in av1_highbd_wiener_convolve_add_src_ssse3() 82 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67); in av1_highbd_wiener_convolve_add_src_ssse3() 91 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01); in av1_highbd_wiener_convolve_add_src_ssse3() 93 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23); in av1_highbd_wiener_convolve_add_src_ssse3() 95 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45); in av1_highbd_wiener_convolve_add_src_ssse3() 97 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67); in av1_highbd_wiener_convolve_add_src_ssse3()
|
D | jnt_convolve_ssse3.c | 89 const __m128i src_2 = _mm_alignr_epi8(src_hi, src_lo, 4); in av1_dist_wtd_convolve_2d_ssse3() 91 const __m128i src_4 = _mm_alignr_epi8(src_hi, src_lo, 8); in av1_dist_wtd_convolve_2d_ssse3() 93 const __m128i src_6 = _mm_alignr_epi8(src_hi, src_lo, 12); in av1_dist_wtd_convolve_2d_ssse3() 102 const __m128i src_1 = _mm_alignr_epi8(src_hi, src_lo, 2); in av1_dist_wtd_convolve_2d_ssse3() 104 const __m128i src_3 = _mm_alignr_epi8(src_hi, src_lo, 6); in av1_dist_wtd_convolve_2d_ssse3() 106 const __m128i src_5 = _mm_alignr_epi8(src_hi, src_lo, 10); in av1_dist_wtd_convolve_2d_ssse3() 108 const __m128i src_7 = _mm_alignr_epi8(src_hi, src_lo, 14); in av1_dist_wtd_convolve_2d_ssse3()
|
D | highbd_jnt_convolve_sse4.c | 313 s[0] = _mm_alignr_epi8(row01, row00, 0); in av1_highbd_dist_wtd_convolve_x_sse4_1() 314 s[1] = _mm_alignr_epi8(row01, row00, 4); in av1_highbd_dist_wtd_convolve_x_sse4_1() 315 s[2] = _mm_alignr_epi8(row01, row00, 8); in av1_highbd_dist_wtd_convolve_x_sse4_1() 316 s[3] = _mm_alignr_epi8(row01, row00, 12); in av1_highbd_dist_wtd_convolve_x_sse4_1() 323 s[0] = _mm_alignr_epi8(row01, row00, 2); in av1_highbd_dist_wtd_convolve_x_sse4_1() 324 s[1] = _mm_alignr_epi8(row01, row00, 6); in av1_highbd_dist_wtd_convolve_x_sse4_1() 325 s[2] = _mm_alignr_epi8(row01, row00, 10); in av1_highbd_dist_wtd_convolve_x_sse4_1() 326 s[3] = _mm_alignr_epi8(row01, row00, 14); in av1_highbd_dist_wtd_convolve_x_sse4_1()
|
D | highbd_convolve_2d_sse4.c | 242 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 244 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 246 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 255 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 257 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 259 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 261 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67); in av1_highbd_dist_wtd_convolve_2d_sse4_1()
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | intra_edge_sse4.cc | 73 _mm_alignr_epi8(result_hi, _mm_slli_si128(result_lo, 10), 10); in ComputeKernel1Store12() 113 _mm_alignr_epi8(result_hi, _mm_slli_si128(result_lo, 10), 10); in ComputeKernel2Store12() 150 sum = _mm_alignr_epi8(sum_hi, _mm_slli_si128(sum, 8), 8); in ComputeKernel3Store8() 222 __m128i sum_lo = _mm_sub_epi16(_mm_alignr_epi8(src9_hi, src9_lo, 2), src_lo); in IntraEdgeUpsampler_SSE4_1() 223 sum_lo = _mm_add_epi16(sum_lo, _mm_alignr_epi8(src9_hi, src9_lo, 4)); in IntraEdgeUpsampler_SSE4_1() 224 sum_lo = _mm_sub_epi16(sum_lo, _mm_alignr_epi8(src_hi, src_lo, 6)); in IntraEdgeUpsampler_SSE4_1() 234 _mm_sub_epi16(_mm_alignr_epi8(src9_hi_extra, src9_hi, 2), src_hi); in IntraEdgeUpsampler_SSE4_1() 235 sum_hi = _mm_add_epi16(sum_hi, _mm_alignr_epi8(src9_hi_extra, src9_hi, 4)); in IntraEdgeUpsampler_SSE4_1() 236 sum_hi = _mm_sub_epi16(sum_hi, _mm_alignr_epi8(src_hi_extra, src_hi, 6)); in IntraEdgeUpsampler_SSE4_1()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | highbd_convolve_ssse3.c | 210 s[0] = _mm_alignr_epi8(row01, row00, 0); in av1_highbd_convolve_x_sr_ssse3() 211 s[1] = _mm_alignr_epi8(row01, row00, 4); in av1_highbd_convolve_x_sr_ssse3() 212 s[2] = _mm_alignr_epi8(row01, row00, 8); in av1_highbd_convolve_x_sr_ssse3() 213 s[3] = _mm_alignr_epi8(row01, row00, 12); in av1_highbd_convolve_x_sr_ssse3() 220 s[0] = _mm_alignr_epi8(row01, row00, 2); in av1_highbd_convolve_x_sr_ssse3() 221 s[1] = _mm_alignr_epi8(row01, row00, 6); in av1_highbd_convolve_x_sr_ssse3() 222 s[2] = _mm_alignr_epi8(row01, row00, 10); in av1_highbd_convolve_x_sr_ssse3() 223 s[3] = _mm_alignr_epi8(row01, row00, 14); in av1_highbd_convolve_x_sr_ssse3()
|
/external/flac/src/libFLAC/ |
D | lpc_intrin_sse41.c | 640 dat[5] = _mm_alignr_epi8(dat[5], dat[4], 8); // ? d[i-11] ? d[i-10] in FLAC__lpc_restore_signal_wide_intrin_sse41() 641 dat[4] = _mm_alignr_epi8(dat[4], dat[3], 8); // ? d[i-9] ? d[i-8] in FLAC__lpc_restore_signal_wide_intrin_sse41() 642 dat[3] = _mm_alignr_epi8(dat[3], dat[2], 8); // ? d[i-7] ? d[i-6] in FLAC__lpc_restore_signal_wide_intrin_sse41() 643 dat[2] = _mm_alignr_epi8(dat[2], dat[1], 8); // ? d[i-5] ? d[i-4] in FLAC__lpc_restore_signal_wide_intrin_sse41() 644 dat[1] = _mm_alignr_epi8(dat[1], dat[0], 8); // ? d[i-3] ? d[i-2] in FLAC__lpc_restore_signal_wide_intrin_sse41() 645 dat[0] = _mm_alignr_epi8(dat[0], temp, 8); // ? d[i-1] ? d[i ] in FLAC__lpc_restore_signal_wide_intrin_sse41() 691 dat[4] = _mm_alignr_epi8(dat[4], dat[3], 8); in FLAC__lpc_restore_signal_wide_intrin_sse41() 692 dat[3] = _mm_alignr_epi8(dat[3], dat[2], 8); in FLAC__lpc_restore_signal_wide_intrin_sse41() 693 dat[2] = _mm_alignr_epi8(dat[2], dat[1], 8); in FLAC__lpc_restore_signal_wide_intrin_sse41() 694 dat[1] = _mm_alignr_epi8(dat[1], dat[0], 8); in FLAC__lpc_restore_signal_wide_intrin_sse41() [all …]
|