Home
last modified time | relevance | path

Searched refs:vacc0x01234567 (Results 1 – 25 of 272) sorted by relevance

1234567891011

/external/XNNPACK/src/f16-gemm/gen/
D1x8-minmax-neonfp16arith-ld64.c45 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
54 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
58 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
63 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
67 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
72 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
76 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
81 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
85 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
96 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
[all …]
D1x16-minmax-neonfp16arith-ld64.c45 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() local
56 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
61 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
68 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
73 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
80 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
85 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
92 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
97 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
110 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c63 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
64 float16x8_t vacc1x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
65 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
66 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
78 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
88 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
96 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
106 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
114 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
124 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen-inc/
D1x8inc-minmax-neonfp16arith-ld64.c47 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() local
56 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
60 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
65 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
69 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
74 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
78 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
83 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
87 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
98 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
[all …]
D1x16inc-minmax-neonfp16arith-ld64.c47 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() local
58 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
63 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
70 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
75 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
82 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
87 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
94 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
99 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
112 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
[all …]
D4x8inc-minmax-neonfp16arith-ld64.c65 …float16x8_t vacc0x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() local
80 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
90 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
98 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
108 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
116 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
126 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
134 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
144 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
161 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-igemm/gen/
D1x8-minmax-neonfp16arith-ld64.c47 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
65 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
69 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
74 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
78 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
83 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
87 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
92 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
96 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
105 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
[all …]
D1x16-minmax-neonfp16arith-ld64.c47 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() local
67 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
72 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
79 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
84 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
91 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
96 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
103 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
108 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
119 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0, vb01234567); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c59 …float16x8_t vacc0x01234567 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8_t)); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
60 float16x8_t vacc1x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
61 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
62 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
98 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
108 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
116 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c1, va0, 1); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
126 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
134 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c2, va0, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
144 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D1x16s4-minmax-fma3-broadcast.c42 __m256 vacc0x01234567 = _mm256_load_ps(w + 0); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
55 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
63 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c1, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
71 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
79 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
95 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
103 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
107 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
111 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
120 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast()
[all …]
D1x16-minmax-fma3-broadcast.c42 __m256 vacc0x01234567 = _mm256_load_ps(w + 0); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast() local
55 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
62 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
66 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
70 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
79 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
81 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
85 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
89 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast()
D1x16-minmax-avx-broadcast.c42 __m256 vacc0x01234567 = _mm256_load_ps(w + 0); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast() local
55 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
62 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
66 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
70 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
79 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
81 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
85 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
89 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast()
/external/XNNPACK/src/f32-gemm/gen-inc/
D1x16s4inc-minmax-fma3-broadcast.c44 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast() local
57 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
65 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c1, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
73 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
81 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
97 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
105 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
109 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
113 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
122 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast()
[all …]
D1x16inc-minmax-fma3-broadcast.c44 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast() local
57 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
64 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
68 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
72 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
81 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
83 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
87 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
91 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast()
D1x16inc-minmax-avx-broadcast.c44 __m256 vacc0x01234567 = _mm256_load_ps(acc + 0); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast() local
57 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
64 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
68 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
72 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
81 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
83 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
87 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
91 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast()
/external/XNNPACK/src/f16-vmulcaddc/gen/
Dc16-minmax-neonfp16arith-2x.c53 float16x8_t vacc0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() local
61 vacc0x01234567 = vfmaq_f16(vbias01234567, vscale01234567, vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x()
66 vacc0x01234567 = vmaxq_f16(vacc0x01234567, vmin); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x()
71 vacc0x01234567 = vminq_f16(vacc0x01234567, vmax); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x()
76 vst1q_f16(o0, vacc0x01234567); o0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x()
84 float16x8_t vacc0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x() local
89 vacc0x01234567 = vfmaq_f16(vbias01234567, vscale01234567, vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x()
92 vacc0x01234567 = vmaxq_f16(vacc0x01234567, vmin); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x()
95 vacc0x01234567 = vminq_f16(vacc0x01234567, vmax); in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x()
98 vst1q_f16(o0, vacc0x01234567); o0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x()
[all …]
Dc8-minmax-neonfp16arith-2x.c52 float16x8_t vacc0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() local
57 vacc0x01234567 = vfmaq_f16(vbias01234567, vscale01234567, vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x()
60 vacc0x01234567 = vmaxq_f16(vacc0x01234567, vmin); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x()
63 vacc0x01234567 = vminq_f16(vacc0x01234567, vmax); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x()
66 vst1q_f16(o0, vacc0x01234567); o0 += 8; in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x()
72 float16x8_t vacc0x01234567 = vld1q_f16(i0); i0 = (const __fp16*) ((uintptr_t) i0 + c); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x() local
77 vacc0x01234567 = vfmaq_f16(vbias01234567, vscale01234567, vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x()
80 vacc0x01234567 = vmaxq_f16(vacc0x01234567, vmin); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x()
83 vacc0x01234567 = vminq_f16(vacc0x01234567, vmax); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x()
86 float16x4_t vacc0x0123 = vget_low_f16(vacc0x01234567); in xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x()
[all …]
/external/XNNPACK/src/f32-igemm/gen/
D1x16s4-minmax-fma3-broadcast.c46 __m256 vacc0x01234567 = _mm256_load_ps(w); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast() local
68 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
76 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c1, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
84 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c2, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
92 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
108 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
118 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
122 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
126 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
134 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast()
[all …]
D1x16-minmax-avx-broadcast.c46 __m256 vacc0x01234567 = _mm256_load_ps(w); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast() local
68 vacc0x01234567 = _mm256_add_ps(vacc0x01234567, _mm256_mul_ps(va0, vb01234567)); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
76 vacc0x01234567 = _mm256_min_ps(vacc0x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
80 vacc0x01234567 = _mm256_max_ps(vacc0x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
84 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
92 _mm256_storeu_ps(c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
94 vacc0x01234567 = vacc0x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
98 __m128 vacc0x0123 = _mm256_castps256_ps128(vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
102 vacc0x0123 = _mm256_extractf128_ps(vacc0x01234567, 1); in xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast()
/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-sse41-c8-acc2.c60 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local
63 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
65 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
68 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
70 const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
71 … = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, _mm_cmpgt_epi16(_mm_setzero_si128(), va… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
106 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local
109 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
111 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
114 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
[all …]
D7p7x-minmax-wasmsimd-c8-acc2.c59 v128_t vacc0x01234567 = wasm_i16x8_add(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local
62 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
64 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
67 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
69 const v128_t vacc0123 = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
70 const v128_t vacc4567 = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
104 v128_t vacc0x01234567 = wasm_i16x8_add(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local
107 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
109 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
112 vacc0x01234567 = wasm_i16x8_add(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
[all …]
D7p7x-minmax-neon-c8-acc2.c52 int16x8_t vacc0x01234567 = vaddl_s8(vi0x01234567, vi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local
55 vacc0x01234567 = vaddw_s8(vacc0x01234567, vi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
57 vacc0x01234567 = vaddw_s8(vacc0x01234567, vi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
60 vacc0x01234567 = vaddq_s16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
62 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
63 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
89 int16x8_t vacc0x01234567 = vaddl_s8(vi0x01234567, vi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local
92 vacc0x01234567 = vaddw_s8(vacc0x01234567, vi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
94 vacc0x01234567 = vaddw_s8(vacc0x01234567, vi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
97 vacc0x01234567 = vaddq_s16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
[all …]
D7p7x-minmax-sse2-c8-acc2.c67 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local
70 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
72 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
75 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
77 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
78 …const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
79 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
121 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local
124 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
126 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
[all …]
D7p7x-minmax-ssse3-c8-acc2.c67 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local
70 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
72 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
75 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
77 const __m128i vsgnacc0x01234567 = _mm_cmpgt_epi16(_mm_setzero_si128(), vacc0x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
78 …const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
79 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, vsgnacc0x01234567… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
121 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local
124 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
126 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
[all …]
D7p7x-minmax-sse41-c24-acc2.c74 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local
81 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
87 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
92 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
96 const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
97 … = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, _mm_cmpgt_epi16(_mm_setzero_si128(), va… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
129 __m128i vacc0x01234567 = _mm_add_epi16(vxi0x01234567, vxi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local
132 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi4x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
134 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vxi6x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
137 vacc0x01234567 = _mm_add_epi16(vacc0x01234567, vacc1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
[all …]

1234567891011