/external/XNNPACK/src/f32-gemm/gen/ |
D | 3x16s4-minmax-fma3-broadcast.c | 59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() local 80 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() 94 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() 108 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() 122 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() 146 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() 160 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() 174 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() 188 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() 200 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() [all …]
|
D | 4x16s4-minmax-fma3-broadcast.c | 65 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local 91 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() 108 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() 125 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() 142 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() 170 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() 187 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() 204 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() 221 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() 235 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() [all …]
|
D | 3x16-minmax-avx-broadcast.c | 59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast() local 80 vacc2x89ABCDEF = _mm256_add_ps(vacc2x89ABCDEF, _mm256_mul_ps(va2, vb89ABCDEF)); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast() 91 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast() 99 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast() 103 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast() 123 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
|
D | 3x16-minmax-fma3-broadcast.c | 59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast() local 80 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast() 91 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast() 99 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast() 103 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast() 123 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
|
D | 5x16s4-minmax-fma3-broadcast.c | 71 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local 102 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 122 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 142 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 162 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 194 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 214 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 234 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 254 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() 270 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 3x16s4inc-minmax-fma3-broadcast.c | 61 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() local 82 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() 96 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() 110 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() 124 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() 148 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() 162 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() 176 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() 190 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() 202 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() [all …]
|
D | 4x16s4inc-minmax-fma3-broadcast.c | 67 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local 93 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() 110 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() 127 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() 144 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() 172 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() 189 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() 206 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() 223 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() 237 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() [all …]
|
D | 3x16inc-minmax-fma3-broadcast.c | 61 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast() local 82 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast() 93 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast() 101 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast() 105 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast() 125 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
|
D | 3x16inc-minmax-avx-broadcast.c | 61 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast() local 82 vacc2x89ABCDEF = _mm256_add_ps(vacc2x89ABCDEF, _mm256_mul_ps(va2, vb89ABCDEF)); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast() 93 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast() 101 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast() 105 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast() 125 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
|
D | 5x16s4inc-minmax-fma3-broadcast.c | 73 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local 104 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 124 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 144 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 164 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 196 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 216 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 236 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 256 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() 272 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() [all …]
|
D | 4x16inc-minmax-fma3-broadcast.c | 67 __m256 vacc2x89ABCDEF = _mm256_load_ps(acc + 40); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast() local 93 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast() 106 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast() 116 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast() 124 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast() 147 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
|
/external/XNNPACK/src/f16-gemm/gen/ |
D | 4x16-minmax-neonfp16arith-ld64.c | 68 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local 89 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() 103 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() 116 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() 130 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() 143 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() 157 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() 170 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() 184 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() 206 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() [all …]
|
D | 6x16-minmax-neonfp16arith-ld64.c | 80 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local 109 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 129 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 146 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 166 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 183 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 203 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 220 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 240 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() 268 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() [all …]
|
D | 3x16-minmax-avx2-broadcast.c | 60 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast() local 81 …vacc2x89ABCDEF = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF),… in xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast() 92 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast() 100 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast() 110 _mm_storeu_si128((__m128i*) (c2 + 8), _mm256_cvtps_ph(vacc2x89ABCDEF, _MM_FROUND_NO_EXC)); in xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast() 129 vh2x01234567 = _mm256_cvtps_ph(vacc2x89ABCDEF, _MM_FROUND_NO_EXC); in xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast()
|
D | 8x16-minmax-neonfp16arith-ld64.c | 92 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local 129 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 155 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 176 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 202 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 223 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 249 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 270 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 296 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 330 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 3x16s4-minmax-fma3-broadcast.c | 59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() local 99 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() 113 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() 127 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() 141 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() 165 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() 179 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() 193 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() 207 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() 221 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() [all …]
|
D | 4x16s4-minmax-fma3-broadcast.c | 63 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local 113 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() 130 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() 147 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() 164 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() 192 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() 209 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() 226 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() 243 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() 259 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() [all …]
|
D | 5x16s4-minmax-fma3-broadcast.c | 67 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local 127 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 147 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc1, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 167 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc2, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 187 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc3, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 219 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 239 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc1, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 259 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc2, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 279 …vacc2x89ABCDEF = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc3, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() 297 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() [all …]
|
D | 3x16-minmax-fma3-broadcast.c | 59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast() local 99 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEF, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast() 111 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast() 119 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast() 123 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast() 140 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
|
D | 3x16-minmax-avx-broadcast.c | 59 __m256 vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast() local 99 vacc2x89ABCDEF = _mm256_add_ps(vacc2x89ABCDEF, _mm256_mul_ps(va2, vb89ABCDEF)); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast() 111 vacc2x89ABCDEF = _mm256_max_ps(vacc2x89ABCDEF, vmin); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast() 119 vacc2x89ABCDEF = _mm256_min_ps(vacc2x89ABCDEF, vmax); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast() 123 _mm256_storeu_ps(c2 + 8, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast() 140 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
|
/external/XNNPACK/src/f16-gemm/gen-inc/ |
D | 4x16inc-minmax-neonfp16arith-ld64.c | 70 …float16x8_t vacc2x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local 91 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 105 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 118 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 132 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 145 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 159 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 172 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 186 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() 208 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() [all …]
|
D | 6x16inc-minmax-neonfp16arith-ld64.c | 82 …float16x8_t vacc2x89ABCDEF = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local 111 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 131 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 148 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 168 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 185 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 205 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 222 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 242 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() 270 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-igemm/gen/ |
D | 4x16-minmax-neonfp16arith-ld64.c | 64 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local 109 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() 123 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() 136 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() 150 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() 163 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() 177 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() 190 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() 204 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() 224 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() [all …]
|
D | 6x16-minmax-neonfp16arith-ld64.c | 72 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local 135 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 155 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 172 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 192 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 209 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 229 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 246 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 266 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() 292 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() [all …]
|
D | 8x16-minmax-neonfp16arith-ld64.c | 80 float16x8_t vacc2x89ABCDEF = vacc0x89ABCDEF; in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local 161 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 187 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 208 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc1, va2, 1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 234 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c1, vb89ABCDEFc1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 255 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc2, va2, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 281 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c2, vb89ABCDEFc2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 302 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc3, va2, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 328 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c3, vb89ABCDEFc3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 360 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2, vb89ABCDEF); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|