Home
last modified time | relevance | path

Searched refs:vb89ABCDEFc0 (Results 1 – 25 of 119) sorted by relevance

12345

/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-minmax-fma3-broadcast.c118 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
125 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
126 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
127 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
128 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
129 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
210 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
217 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
218 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
219 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
[all …]
D4x16s4-minmax-fma3-broadcast.c105 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
111 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
112 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
113 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
114 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
184 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
190 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
191 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
192 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
193 … = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16s4-minmax-fma3-broadcast.c92 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() local
97 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
98 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
99 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
158 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() local
163 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
164 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
165 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
D1x16s4-minmax-fma3-broadcast.c66 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() local
69 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
106 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() local
109 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D5x16s4-minmax-fma3-broadcast.c93 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
100 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
101 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
102 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
103 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
104 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
185 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
192 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
193 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
194 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
[all …]
D4x16s4-minmax-fma3-broadcast.c83 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
89 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
90 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
91 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
92 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
162 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
168 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
169 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
170 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
171 … = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16s4-minmax-fma3-broadcast.c73 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() local
78 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
79 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
80 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
139 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() local
144 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
145 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
146 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
D1x16s4-minmax-fma3-broadcast.c53 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
56 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
93 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
96 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen-inc/
D5x16s4inc-minmax-fma3-broadcast.c95 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local
102 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
103 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
104 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
105 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
106 vacc4x89ABCDEF = _mm256_fmadd_ps(va4, vb89ABCDEFc0, vacc4x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
187 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast() local
194 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
195 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
196 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast()
[all …]
D4x16s4inc-minmax-fma3-broadcast.c85 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
91 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
92 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
93 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
94 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
164 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
170 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
171 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
172 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
173 … = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
D3x16s4inc-minmax-fma3-broadcast.c75 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() local
80 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
81 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
82 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
141 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() local
146 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
147 … = _mm256_fmadd_ps(_mm256_and_ps(va1, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
148 … = _mm256_fmadd_ps(_mm256_and_ps(va2, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
D1x16s4inc-minmax-fma3-broadcast.c55 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() local
58 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
95 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() local
98 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f16-gemm/gen/
D6x16-minmax-neonfp16arith-ld64.c98 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
107 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
108 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
109 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
110 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
111 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
112 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc0, va5, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
127 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
128 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
129 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c116 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
127 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
128 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
129 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
130 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
131 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
132 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc0, va5, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
133 vacc6x89ABCDEF = vfmaq_lane_f16(vacc6x89ABCDEF, vb89ABCDEFc0, va6, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
134 vacc7x89ABCDEF = vfmaq_lane_f16(vacc7x89ABCDEF, vb89ABCDEFc0, va7, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
153 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c80 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
87 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
88 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
89 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
90 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
101 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
102 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
103 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
104 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x16-minmax-neonfp16arith-ld64.c53 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() local
57 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
62 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
/external/XNNPACK/src/f16-gemm/gen-inc/
D8x16inc-minmax-neonfp16arith-ld64.c118 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local
129 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
130 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
131 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
132 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
133 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
134 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
135 vacc6x89ABCDEF = vfmaq_lane_f16(vacc6x89ABCDEF, vb89ABCDEFc0, va6, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
136 vacc7x89ABCDEF = vfmaq_lane_f16(vacc7x89ABCDEF, vb89ABCDEFc0, va7, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
155 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c100 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
109 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
110 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
111 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
112 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
113 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
114 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
129 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
130 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
131 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16inc-minmax-neonfp16arith-ld64.c82 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
89 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
90 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
91 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
92 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
103 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
104 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
105 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
106 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x16inc-minmax-neonfp16arith-ld64.c55 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() local
59 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
64 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
/external/XNNPACK/src/f16-igemm/gen/
D8x16-minmax-neonfp16arith-ld64.c148 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
159 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
160 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
161 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
162 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
163 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
164 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc0, va5, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
165 vacc6x89ABCDEF = vfmaq_lane_f16(vacc6x89ABCDEF, vb89ABCDEFc0, va6, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
166 vacc7x89ABCDEF = vfmaq_lane_f16(vacc7x89ABCDEF, vb89ABCDEFc0, va7, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
185 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c124 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
133 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
134 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
135 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
136 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
137 vacc4x89ABCDEF = vfmaq_lane_f16(vacc4x89ABCDEF, vb89ABCDEFc0, va4, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
138 vacc5x89ABCDEF = vfmaq_lane_f16(vacc5x89ABCDEF, vb89ABCDEFc0, va5, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
153 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
154 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
155 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c100 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
107 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
108 vacc1x89ABCDEF = vfmaq_lane_f16(vacc1x89ABCDEF, vb89ABCDEFc0, va1, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
109 vacc2x89ABCDEF = vfmaq_lane_f16(vacc2x89ABCDEF, vb89ABCDEFc0, va2, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
110 vacc3x89ABCDEF = vfmaq_lane_f16(vacc3x89ABCDEF, vb89ABCDEFc0, va3, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
121 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
122 vacc1x89ABCDEF = vfmaq_f16(vacc1x89ABCDEF, va1c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
123 vacc2x89ABCDEF = vfmaq_f16(vacc2x89ABCDEF, va2c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
124 vacc3x89ABCDEF = vfmaq_f16(vacc3x89ABCDEF, va3c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x16-minmax-neonfp16arith-ld64.c64 …const float16x8_t vb89ABCDEFc0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() local
68 vacc0x89ABCDEF = vfmaq_lane_f16(vacc0x89ABCDEF, vb89ABCDEFc0, va0, 0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
73 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
/external/XNNPACK/src/amalgam/
Dfma3.c2302 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
2305 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
2342 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
2345 … = _mm256_fmadd_ps(_mm256_and_ps(va0, _mm256_cmp_ps(vb89ABCDEFc0, vzero, _CMP_NEQ_OQ)), vb89ABCDEF… in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
2489 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
2495 vacc0x89ABCDEF = _mm256_fmadd_ps(va0, vb89ABCDEFc0, vacc0x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
2496 vacc1x89ABCDEF = _mm256_fmadd_ps(va1, vb89ABCDEFc0, vacc1x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
2497 vacc2x89ABCDEF = _mm256_fmadd_ps(va2, vb89ABCDEFc0, vacc2x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
2498 vacc3x89ABCDEF = _mm256_fmadd_ps(va3, vb89ABCDEFc0, vacc3x89ABCDEF); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
2568 const __m256 vb89ABCDEFc0 = _mm256_load_ps(w + 8); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
[all …]

12345