Home
last modified time | relevance | path

Searched refs:vb01234567c0 (Results 1 – 25 of 70) sorted by relevance

123

/external/XNNPACK/src/f16-gemm/gen/
D8x8-minmax-neonfp16arith-ld64.c107 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
110 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
111 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
112 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
113 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
114 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
115 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
116 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
117 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
128 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c91 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
94 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
95 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
96 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
97 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
98 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
99 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
108 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
109 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
110 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c75 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
78 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
79 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
80 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
81 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
88 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
89 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
90 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
91 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
D6x16-minmax-neonfp16arith-ld64.c97 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
101 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
102 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
103 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
104 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
105 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
106 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
121 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
122 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
123 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D8x16-minmax-neonfp16arith-ld64.c115 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
119 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
120 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
121 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
122 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
123 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
124 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
125 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
126 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
145 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c79 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
83 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
84 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
85 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
86 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
97 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
98 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
99 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
100 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8-minmax-neonfp16arith-ld64.c51 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
54 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
58 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f16-igemm/gen/
D8x8-minmax-neonfp16arith-ld64.c139 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
142 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
143 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
144 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
145 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
146 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
147 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
148 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
149 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
160 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c117 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
120 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
121 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
122 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
123 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
124 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
125 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
134 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
135 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
136 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-minmax-neonfp16arith-ld64.c95 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
98 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
99 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
100 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
101 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
108 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
109 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
110 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
111 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
D8x16-minmax-neonfp16arith-ld64.c147 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local
151 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
152 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
153 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
154 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
155 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
156 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
157 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
158 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
177 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c123 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
127 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
128 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
129 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
130 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
131 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
132 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
147 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
148 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
149 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c99 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
103 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
104 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
105 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
106 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
117 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
118 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
119 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
120 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8-minmax-neonfp16arith-ld64.c62 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof( float16x… in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
65 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
69 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f16-gemm/gen-inc/
D8x8inc-minmax-neonfp16arith-ld64.c109 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local
112 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
113 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
114 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
115 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
116 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
117 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
118 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
119 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
130 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8inc-minmax-neonfp16arith-ld64.c93 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64() local
96 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
97 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
98 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
99 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
100 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
101 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
110 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
111 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
112 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8inc-minmax-neonfp16arith-ld64.c77 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() local
80 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
81 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
82 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
83 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
90 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
91 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
92 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
93 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
D8x16inc-minmax-neonfp16arith-ld64.c117 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local
121 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
122 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
123 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
124 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
125 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
126 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
127 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c0, va6, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
128 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
147 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c99 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
103 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
104 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
105 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
106 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
107 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c0, va4, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
108 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c0, va5, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
123 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
124 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
125 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
D4x16inc-minmax-neonfp16arith-ld64.c81 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
85 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
86 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c0, va1, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
87 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
88 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
99 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
100 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
101 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
102 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
D1x8inc-minmax-neonfp16arith-ld64.c53 …const float16x8_t vb01234567c0 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() local
56 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c0, va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
60 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x16s4inc-minmax-fma3-broadcast.c84 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
87 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
88 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c0, vacc1x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
89 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
90 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c0, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D4x16s4-minmax-fma3-broadcast.c82 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
85 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
86 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c0, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
87 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
88 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c0, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
D5x16s4-minmax-fma3-broadcast.c92 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast() local
95 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
96 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c0, vacc1x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
97 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
98 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c0, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
99 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c0, vacc4x01234567); in xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-minmax-fma3-broadcast.c117 const __m256 vb01234567c0 = _mm256_load_ps(w + 0); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast() local
120 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c0, vacc0x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
121 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c0, vacc1x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
122 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
123 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c0, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()
124 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c0, vacc4x01234567); in xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast()

123