/external/XNNPACK/src/f16-gemm/gen/ |
D | 8x8-minmax-neonfp16arith-ld64.c | 93 float16x8_t vacc6x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local 116 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 134 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 146 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c1, va6, 1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 164 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 176 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 194 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 206 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 224 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 249 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6, vb01234567); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() [all …]
|
D | 8x16-minmax-neonfp16arith-ld64.c | 99 float16x8_t vacc6x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local 125 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 151 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 172 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c1, va6, 1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 198 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 219 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 245 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 266 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 292 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 326 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6, vb01234567); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-igemm/gen/ |
D | 8x8-minmax-neonfp16arith-ld64.c | 81 float16x8_t vacc6x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local 148 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 166 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 178 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c1, va6, 1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 196 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 208 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 226 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 238 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 256 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 279 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6, vb01234567); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() [all …]
|
D | 8x16-minmax-neonfp16arith-ld64.c | 87 float16x8_t vacc6x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local 157 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 183 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 204 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c1, va6, 1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 230 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 251 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 277 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 298 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 324 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 356 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6, vb01234567); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-gemm/gen-inc/ |
D | 8x8inc-minmax-neonfp16arith-ld64.c | 95 …float16x8_t vacc6x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local 118 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 136 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 148 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c1, va6, 1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 166 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 178 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 196 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 208 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 226 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 251 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6, vb01234567); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() [all …]
|
D | 8x16inc-minmax-neonfp16arith-ld64.c | 101 …float16x8_t vacc6x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local 127 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 153 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 174 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c1, va6, 1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 200 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 221 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c2, va6, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 247 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 268 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 294 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 328 vacc6x01234567 = vfmaq_f16(vacc6x01234567, va6, vb01234567); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 7x8inc-minmax-avx-broadcast.c | 86 __m256 vacc6x01234567 = _mm256_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast() local 115 vacc6x01234567 = _mm256_add_ps(vacc6x01234567, _mm256_mul_ps(va6, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast() 127 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast() 136 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast() 139 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast() 164 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast() 180 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast()
|
D | 7x8inc-minmax-fma3-broadcast.c | 86 __m256 vacc6x01234567 = _mm256_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast() local 115 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast() 127 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast() 136 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast() 139 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast() 164 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast() 180 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast()
|
D | 8x8inc-minmax-fma3-broadcast.c | 92 __m256 vacc6x01234567 = _mm256_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() local 124 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 137 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 147 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 153 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 180 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 198 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 7x8-minmax-fma3-broadcast.c | 84 __m256 vacc6x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast() local 113 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast() 125 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast() 134 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast() 137 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast() 162 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast() 178 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast()
|
D | 7x8-minmax-avx-broadcast.c | 84 __m256 vacc6x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast() local 113 vacc6x01234567 = _mm256_add_ps(vacc6x01234567, _mm256_mul_ps(va6, vb01234567)); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast() 125 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast() 134 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast() 137 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast() 162 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast() 178 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast()
|
D | 8x8-minmax-fma3-broadcast.c | 90 __m256 vacc6x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() local 122 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 135 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 145 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 151 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 178 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 196 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 7x8-minmax-fma3-broadcast.c | 76 __m256 vacc6x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast() local 144 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast() 157 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast() 166 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast() 169 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast() 187 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast() 203 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast()
|
D | 7x8-minmax-avx-broadcast.c | 76 __m256 vacc6x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast() local 144 vacc6x01234567 = _mm256_add_ps(vacc6x01234567, _mm256_mul_ps(va6, vb01234567)); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast() 157 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast() 166 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast() 169 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast() 187 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast() 203 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast()
|
D | 8x8-minmax-fma3-broadcast.c | 80 __m256 vacc6x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() local 156 vacc6x01234567 = _mm256_fmadd_ps(va6, vb01234567, vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 170 vacc6x01234567 = _mm256_min_ps(vacc6x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 180 vacc6x01234567 = _mm256_max_ps(vacc6x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 186 _mm256_storeu_ps(c6, vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 205 __m128 vacc6x0123 = _mm256_castps256_ps128(vacc6x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 223 vacc6x0123 = _mm256_extractf128_ps(vacc6x01234567, 1); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x8c4-minmax-neondot.c | 259 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 265 … int8x16_t vout6x01234567_7x01234567 = vqmovn_high_s16(vqmovn_s16(vacc6x01234567), vacc7x01234567); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 273 …const int16x8_t vacc6x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc6x0123), vqmovn_s32(vacc6x… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 279 …int8x16_t vout6x01234567_7x01234567 = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc7x012… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 383 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 394 int8x16_t vout6x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc6x01234567), vacc6x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 409 …const int16x8_t vacc6x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc6x0123), vqmovn_s32(vacc6x… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 420 …int8x16_t vout6x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc6x89ABCD… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x8c4-minmax-neondot.c | 283 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 289 … int8x16_t vout6x01234567_7x01234567 = vqmovn_high_s16(vqmovn_s16(vacc6x01234567), vacc7x01234567); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 297 …const int16x8_t vacc6x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc6x0123), vqmovn_s32(vacc6x… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 303 …int8x16_t vout6x01234567_7x01234567 = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc7x012… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 407 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 418 int8x16_t vout6x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc6x01234567), vacc6x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 433 …const int16x8_t vacc6x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc6x0123), vqmovn_s32(vacc6x… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 444 …int8x16_t vout6x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc6x89ABCD… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
|
/external/XNNPACK/src/qu8-gemm/ |
D | 8x8-minmax-neon.c | 513 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qu8_gemm_minmax_ukernel_8x8__neon() local 519 …uint8x16_t vout6x01234567_7x01234567 = vqmovun_high_s16(vqmovun_s16(vacc6x01234567), vacc7x0123456… in xnn_qu8_gemm_minmax_ukernel_8x8__neon() 533 const int16x8_t vacc6x01234567 = in xnn_qu8_gemm_minmax_ukernel_8x8__neon() local 541 …uint8x16_t vout6x01234567_7x01234567 = vcombine_u8(vqmovun_s16(vacc6x01234567), vqmovun_s16(vacc7x… in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
|
/external/XNNPACK/src/qu8-igemm/ |
D | 8x8-minmax-neon.c | 558 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local 564 …uint8x16_t vout6x01234567_7x01234567 = vqmovun_high_s16(vqmovun_s16(vacc6x01234567), vacc7x0123456… in xnn_qu8_igemm_minmax_ukernel_8x8__neon() 578 const int16x8_t vacc6x01234567 = in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local 586 …uint8x16_t vout6x01234567_7x01234567 = vcombine_u8(vqmovun_s16(vacc6x01234567), vqmovun_s16(vacc7x… in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
|