/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-ssse3-2x4.c | 100 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 101 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 102 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 103 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 117 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 118 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 119 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 120 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 162 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 163 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() [all …]
|
D | 3x3p1-minmax-ssse3-6x4.c | 156 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 157 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 158 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 159 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 160 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 161 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 162 …st __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 163 …st __m128 vi7x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi7x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 193 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 194 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() [all …]
|
D | 3x3p1-minmax-ssse3-1x4.c | 86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 130 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 131 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 132 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 139 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() [all …]
|
D | 3x3p1-minmax-ssse3-4x4.c | 128 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 129 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 130 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 131 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 132 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 133 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 155 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 156 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 157 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 158 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() [all …]
|
D | 3x3p1-minmax-ssse3-3x4.c | 114 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 115 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 116 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 117 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 118 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 136 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 137 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 138 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 139 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 140 …st __m128 vi4x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() [all …]
|
D | 3x3p1-minmax-ssse3-5x4.c | 142 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 143 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 144 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 145 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 146 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 147 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 148 …st __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 174 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 175 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 176 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc2.c | 86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 131 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 132 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 133 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 140 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc3.c | 86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 132 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 133 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 134 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 141 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc4.c | 86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 133 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 134 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 135 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 142 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() [all …]
|
D | 3x3p1-minmax-ssse3-2x4-acc2.c | 100 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 101 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 102 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 103 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 117 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 118 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 119 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 120 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 164 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 165 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() [all …]
|
/external/XNNPACK/src/f32-argmaxpool/ |
D | 9p8x-sse2-c4.c | 78 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 82 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 86 const __m128i vm3 = _mm_castps_si128(_mm_cmpgt_ps(vi3, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 90 const __m128i vm4 = _mm_castps_si128(_mm_cmpgt_ps(vi4, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 94 const __m128i vm5 = _mm_castps_si128(_mm_cmpgt_ps(vi5, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 98 const __m128i vm6 = _mm_castps_si128(_mm_cmpgt_ps(vi6, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 102 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 106 const __m128i vm8 = _mm_castps_si128(_mm_cmpgt_ps(vi8, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 163 const __m128i vm0 = _mm_castps_si128(_mm_cmpgt_ps(vi0, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 167 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() [all …]
|
D | 9x-sse2-c4.c | 97 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 101 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 105 const __m128i vm3 = _mm_castps_si128(_mm_cmpgt_ps(vi3, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 109 const __m128i vm4 = _mm_castps_si128(_mm_cmpgt_ps(vi4, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 113 const __m128i vm5 = _mm_castps_si128(_mm_cmpgt_ps(vi5, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 117 const __m128i vm6 = _mm_castps_si128(_mm_cmpgt_ps(vi6, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 121 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 125 const __m128i vm8 = _mm_castps_si128(_mm_cmpgt_ps(vi8, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 148 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 152 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() [all …]
|
/external/XNNPACK/src/x8-zip/ |
D | x3-sse2.c | 53 const __m128i vtemp0 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 56 const __m128i vtemp1 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 59 const __m128i vtemp2 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 63 const __m128i vxyz0 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 66 const __m128i vxyz1 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 69 const __m128i vxyz2 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 102 const __m128i vtemp0 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 105 const __m128i vtemp1 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 108 const __m128i vtemp2 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() 112 const __m128i vxyz0 = _mm_castps_si128( in xnn_x8_zip_x3_ukernel__sse2() [all …]
|
/external/webrtc/common_audio/third_party/ooura/fft_size_128/ |
D | ooura_fft_sse2.cc | 30 static __inline __m128i _mm_castps_si128(__m128 a) { in _mm_castps_si128() function 137 _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); in cftmdl_128_SSE2() 150 _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); in cftmdl_128_SSE2() 153 _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2))); in cftmdl_128_SSE2() 155 _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1)); in cftmdl_128_SSE2() 158 _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3))); in cftmdl_128_SSE2() 161 _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); in cftmdl_128_SSE2() 162 _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub)); in cftmdl_128_SSE2() 164 _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4)); in cftmdl_128_SSE2() 167 _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3))); in cftmdl_128_SSE2() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx-rr2-p5-div-x80.c | 76 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 77 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 79 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 80 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 82 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 83 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 85 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 86 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 88 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 89 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() [all …]
|
D | avx-rr2-p5-div-x72.c | 73 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 74 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 76 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 77 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 79 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 80 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 82 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 83 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 85 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 86 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() [all …]
|
D | avx-rr2-p5-div-x64.c | 70 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 71 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 73 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 74 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 76 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 77 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 79 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 80 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 82 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 83 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() [all …]
|
D | avx-rr2-p5-div-x56.c | 67 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 68 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 70 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 71 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 73 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 74 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 76 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 77 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 79 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 80 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() [all …]
|
D | sse2-p5-div-x24.c | 61 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 62 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 63 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 64 const __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 65 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 66 const __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 152 …const __m128 vm0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx012… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 153 …const __m128 vm4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx456… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 154 …const __m128 vm89AB = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx89A… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 155 …const __m128 vmCDEF = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxCDE… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() [all …]
|
D | sse2-p5-div-x20.c | 58 const __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 59 const __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 60 const __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 61 const __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 62 const __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 136 …const __m128 vm0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx012… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 137 …const __m128 vm4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx456… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 138 …const __m128 vm89AB = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx89A… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 139 …const __m128 vmCDEF = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxCDE… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 140 …const __m128 vmGHIJ = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxGHI… in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() [all …]
|
D | avx-rr2-p5-div-x48.c | 64 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 65 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 67 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 68 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 70 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 71 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 73 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 74 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 76 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 77 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() [all …]
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | sse2-2x8.c | 59 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 61 …8 vmask0x4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x4567))); in xnn_f32_prelu_ukernel__sse2_2x8() 63 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 65 …8 vmask1x4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x4567))); in xnn_f32_prelu_ukernel__sse2_2x8() 89 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 91 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 111 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 113 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse2-rr2-p6-x24.c | 68 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 69 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 70 __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 71 __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 72 __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 73 __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 158 …const __m128 vm0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx012… in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 160 …const __m128 vm4567 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx456… in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 162 …const __m128 vm89AB = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vx89A… in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 164 …const __m128 vmCDEF = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxCDE… in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 5x8-minmax-sse2-dup.c | 92 …const __m128 va0c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 93 …const __m128 va1c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 94 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 95 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 96 …const __m128 va4c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va4), _MM_SHUFFLE(0, 0… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 112 …const __m128 va0c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 113 …const __m128 va1c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 114 …const __m128 va2c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 115 …const __m128 va3c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 116 …const __m128 va4c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va4), _MM_SHUFFLE(1, 1… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 5x8inc-minmax-sse2-dup.c | 94 …const __m128 va0c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 95 …const __m128 va1c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 96 …const __m128 va2c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 97 …const __m128 va3c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 98 …const __m128 va4c0000 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va4), _MM_SHUFFLE(0, 0… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 114 …const __m128 va0c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 115 …const __m128 va1c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va1), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 116 …const __m128 va2c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va2), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 117 …const __m128 va3c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va3), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 118 …const __m128 va4c1111 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va4), _MM_SHUFFLE(1, 1… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() [all …]
|