Home
last modified time | relevance | path

Searched refs:vi2x0 (Results 1 – 25 of 82) sorted by relevance

1234

/external/XNNPACK/src/f32-conv-hwc/gen/
D3x3s2p0p1c3x8-neonfma-2x1.c87 float32x4_t vi2x0 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local
102 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
104 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk00c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
117 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk20c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
119 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
126 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
128 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk00c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
141 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk20c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
143 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
150 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
[all …]
D3x3s2p0p1c3x8-neon-2x1.c85 float32x4_t vi2x0 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local
100 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
102 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk00c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
115 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk20c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
117 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
124 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
126 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk00c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
139 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk20c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
141 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
148 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
[all …]
D3x3s2p0p1c3x8-neonfma-2x2.c87 float32x4_t vi2x0 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local
113 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
115 vo1x0c4567 = vfmaq_lane_f32(vo1x0c4567, vk00c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
138 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk20c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
140 vo0x0c4567 = vfmaq_lane_f32(vo0x0c4567, vk20c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
152 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
154 vo1x0c4567 = vfmaq_lane_f32(vo1x0c4567, vk00c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
177 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk20c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
179 vo0x0c4567 = vfmaq_lane_f32(vo0x0c4567, vk20c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
198 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
[all …]
D3x3s2p0p1c3x8-neon-2x2.c85 float32x4_t vi2x0 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local
111 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
113 vo1x0c4567 = vmlaq_lane_f32(vo1x0c4567, vk00c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
136 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk20c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
138 vo0x0c4567 = vmlaq_lane_f32(vo0x0c4567, vk20c0x4567, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
150 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
152 vo1x0c4567 = vmlaq_lane_f32(vo1x0c4567, vk00c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
175 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk20c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
177 vo0x0c4567 = vmlaq_lane_f32(vo0x0c4567, vk20c1x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
196 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
[all …]
D3x3s2p0p1c3x4-neon-2x1.c85 float32x4_t vi2x0 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local
97 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
106 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk20c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
112 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
121 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk20c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
127 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
136 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk20c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
142 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk01c0x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
151 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk21c0x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
245 vi2x0 = vcombine_f32(vget_high_f32(vi2x1), vi2x2); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
[all …]
D3x3s2p0p1c3x4-neonfma-2x1.c87 float32x4_t vi2x0 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local
99 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
108 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk20c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
114 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
123 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk20c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
129 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
138 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk20c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
144 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk01c0x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
153 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk21c0x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
247 vi2x0 = vcombine_f32(vget_high_f32(vi2x1), vi2x2); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
[all …]
D3x3s2p1c3x8-neon-2x1.c85 float32x4_t vi2x0 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local
100 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
102 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk00c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
115 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk20c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
117 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
124 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
126 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk00c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
139 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk20c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
141 vo0c4567 = vmlaq_lane_f32(vo0c4567, vk20c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
148 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
[all …]
D3x3s2p1c3x8-neonfma-2x1.c87 float32x4_t vi2x0 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local
102 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
104 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk00c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
117 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk20c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
119 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
126 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
128 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk00c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
141 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk20c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
143 vo0c4567 = vfmaq_lane_f32(vo0c4567, vk20c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
150 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
[all …]
D3x3s2p0p1c3x4-neon-2x2.c85 float32x4_t vi2x0 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local
106 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
121 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk20c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
130 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
145 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk20c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
161 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
176 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk20c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
185 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk01c0x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
200 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk21c0x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
335 vi2x0 = vi2x3; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
[all …]
D3x3s2p0p1c3x4-neonfma-2x2.c87 float32x4_t vi2x0 = vld1q_f32(i2); i2 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local
108 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
123 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk20c0x0123, vget_low_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
132 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
147 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk20c1x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
163 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
178 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk20c2x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
187 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk01c0x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
202 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk21c0x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
337 vi2x0 = vi2x3; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
[all …]
D3x3s2p1c3x8-neonfma-2x2.c87 float32x4_t vi2x0 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local
113 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
115 vo1x0c4567 = vfmaq_lane_f32(vo1x0c4567, vk00c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
138 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk20c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
140 vo0x0c4567 = vfmaq_lane_f32(vo0x0c4567, vk20c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
159 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
161 vo1x0c4567 = vfmaq_lane_f32(vo1x0c4567, vk00c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
184 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk20c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
186 vo0x0c4567 = vfmaq_lane_f32(vo0x0c4567, vk20c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
198 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
[all …]
D3x3s2p1c3x8-neon-2x2.c85 float32x4_t vi2x0 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local
111 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
113 vo1x0c4567 = vmlaq_lane_f32(vo1x0c4567, vk00c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
136 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk20c0x0123, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
138 vo0x0c4567 = vmlaq_lane_f32(vo0x0c4567, vk20c0x4567, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
157 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
159 vo1x0c4567 = vmlaq_lane_f32(vo1x0c4567, vk00c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
182 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk20c1x0123, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
184 vo0x0c4567 = vmlaq_lane_f32(vo0x0c4567, vk20c1x4567, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
196 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk00c2x0123, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
[all …]
/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-scalar-3x1.c71 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
91 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
93 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
95 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
101 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
149 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
151 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
153 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
D3x3p1-minmax-scalar-2x1-acc2.c65 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local
83 float vo1p1 = vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
84 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
89 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
129 float vo1p1 = vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
130 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
D3x3p1-minmax-scalar-2x1.c65 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local
83 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
84 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
89 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
127 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
128 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
D3x3p1-minmax-scalar-4x1.c77 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
100 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
103 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
106 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
113 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
172 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
175 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
178 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D3x3s2p1-minmax-scalar-2x1-acc2.c76 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() local
89 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
92 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
115 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
148 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
151 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
D3x3s2p1-minmax-scalar-2x1.c76 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() local
89 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
92 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
115 vi2x0 = vi2x2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
146 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
149 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
D3x3p1-minmax-scalar-5x1.c83 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
109 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
113 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
117 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
125 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
195 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
199 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
203 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
D5x5p2-minmax-scalar-3x1.c92 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
135 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
137 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
139 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
151 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
262 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
264 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
266 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
278 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
367 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
[all …]
D5x5p2-minmax-scalar-2x1.c86 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
125 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
126 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
135 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
220 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
221 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
230 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
299 vo1p0 += vi2x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
300 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D3x3p1-minmax-scalar-1x1-acc2.c59 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() local
73 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
77 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
104 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
D3x3p1-minmax-scalar-1x1.c59 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() local
73 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
77 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
103 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
D3x3p1-minmax-scalar-1x1-acc3.c59 float vi2x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local
73 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
77 vi2x0 = vi2x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
105 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-wasmsimd-2x2.c99 v128_t vi2x0 = vzero; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
120 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk00c0, wasm_v32x4_shuffle(vi2x0, vi2x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
133 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk20c0, wasm_v32x4_shuffle(vi2x0, vi2x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
148 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk00c1, wasm_v32x4_shuffle(vi2x0, vi2x0, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
161 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi2x0, vi2x0, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
169 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk00c2, wasm_v32x4_shuffle(vi2x0, vi2x0, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
182 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk20c2, wasm_v32x4_shuffle(vi2x0, vi2x0, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
322 vi2x0 = vi2x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
368 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk00c0, wasm_v32x4_shuffle(vi2x0, vi2x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
385 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk20c0, wasm_v32x4_shuffle(vi2x0, vi2x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
[all …]

1234