/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 1x8inc-minmax-sse-dup.c | 70 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup() local 75 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup() 76 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
|
D | 1x8inc-minmax-sse2-dup.c | 70 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup() local 75 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup() 76 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse2_dup()
|
D | 3x8inc-minmax-sse-dup.c | 102 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() local 109 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup() 112 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse_dup()
|
D | 3x8inc-minmax-sse2-dup.c | 102 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() local 109 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup() 112 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8inc-minmax-sse-dup.c | 118 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() local 126 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup() 130 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup()
|
D | 4x8inc-minmax-sse2-dup.c | 118 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() local 126 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup() 130 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse2_dup()
|
D | 5x8inc-minmax-sse2-dup.c | 134 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() local 143 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup() 148 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse2_dup()
|
D | 5x8inc-minmax-sse-dup.c | 134 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() local 143 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup() 148 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_dup()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x8-minmax-sse-dup.c | 68 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup() local 73 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup() 74 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
|
D | 1x8-minmax-sse2-dup.c | 68 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup() local 73 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup() 74 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup()
|
D | 3x8-minmax-sse-dup.c | 100 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() local 107 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() 110 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
|
D | 3x8-minmax-sse2-dup.c | 100 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() local 107 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() 110 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8-minmax-sse2-dup.c | 116 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() local 124 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup() 128 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 116 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() local 124 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup() 128 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_dup()
|
D | 5x8-minmax-sse-dup.c | 132 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() local 141 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() 146 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
|
D | 5x8-minmax-sse2-dup.c | 132 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() local 141 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup() 146 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x8-minmax-sse-dup.c | 81 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_igemm_minmax_ukernel_1x8__sse_dup() local 86 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_1x8__sse_dup() 87 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_1x8__sse_dup()
|
D | 1x8-minmax-sse2-dup.c | 81 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup() local 86 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup() 87 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup()
|
D | 3x8-minmax-sse-dup.c | 119 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() local 126 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup() 129 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse_dup()
|
D | 3x8-minmax-sse2-dup.c | 119 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() local 126 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup() 129 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup()
|
D | 4x8-minmax-sse-dup.c | 138 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() local 146 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup() 150 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_dup()
|
D | 4x8-minmax-sse2-dup.c | 138 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() local 146 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup() 150 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup()
|
D | 5x8-minmax-sse-dup.c | 157 const __m128 va0c2222 = _mm_shuffle_ps(va0, va0, _MM_SHUFFLE(2, 2, 2, 2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() local 166 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup() 171 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_dup()
|
D | 5x8-minmax-sse2-dup.c | 157 …const __m128 va0c2222 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(va0), _MM_SHUFFLE(2, 2… in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() local 166 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0c2222, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup() 171 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup()
|