Home
last modified time | relevance | path

Searched refs:vget_low_f32 (Results 1 – 25 of 360) sorted by relevance

12345678910>>...15

/external/XNNPACK/src/f32-igemm/gen/
D6x8-neonfma-lane-ld128.c127 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
128 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
129 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
130 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
131 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c0, vget_low_f32(va4), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
132 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
133 vacc0x4567 = vfmaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
134 vacc1x4567 = vfmaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
135 vacc2x4567 = vfmaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
136 vacc3x4567 = vfmaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
[all …]
D6x8-neon-lane-ld128.c127 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
128 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
129 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
130 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
131 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c0, vget_low_f32(va4), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
132 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
133 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
134 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
135 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
136 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
[all …]
D4x8-neon-lane-ld128.c103 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
104 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
105 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
106 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
107 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
108 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
109 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
110 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
115 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, vget_low_f32(va0), 1); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
116 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, vget_low_f32(va1), 1); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld128()
[all …]
D4x8-neonfma-lane-ld128.c103 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
104 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
105 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
106 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
107 vacc0x4567 = vfmaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
108 vacc1x4567 = vfmaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
109 vacc2x4567 = vfmaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
110 vacc3x4567 = vfmaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
115 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, vget_low_f32(va0), 1); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
116 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, vget_low_f32(va1), 1); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld128()
[all …]
D6x8-neonfma-dup-ld128.c127 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
128 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
129 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
130 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
131 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
132 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
149 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
150 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
151 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
152 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
[all …]
D6x8-neon-dup-ld128.c127 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
128 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
129 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
130 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
131 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
132 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
149 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
150 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
151 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
152 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D6x8-neonfma-lane-ld128.c101 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
102 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
103 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
104 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
105 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c0, vget_low_f32(va4), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
106 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
107 vacc0x4567 = vfmaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
108 vacc1x4567 = vfmaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
109 vacc2x4567 = vfmaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
110 vacc3x4567 = vfmaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
[all …]
D6x8-neon-lane-ld128.c101 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
102 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
103 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
104 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
105 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c0, vget_low_f32(va4), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
106 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
107 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
108 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
109 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
110 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
[all …]
D4x8-neonfma-lane-ld128.c83 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
84 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
85 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
86 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
87 vacc0x4567 = vfmaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
88 vacc1x4567 = vfmaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
89 vacc2x4567 = vfmaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
90 vacc3x4567 = vfmaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
95 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, vget_low_f32(va0), 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
96 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, vget_low_f32(va1), 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld128()
[all …]
D4x8-neon-lane-ld128.c83 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
84 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
85 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
86 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
87 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
88 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
89 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
90 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
95 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, vget_low_f32(va0), 1); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
96 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, vget_low_f32(va1), 1); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld128()
[all …]
D6x8-neon-dup-ld128.c101 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
102 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
103 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
104 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
105 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
106 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
123 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
124 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
125 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
126 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
[all …]
D6x8-neonfma-dup-ld128.c101 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
102 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
103 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
104 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
105 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
106 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
123 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
124 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
125 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
126 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D6x8-neonfma-lane-ld128.c99 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
100 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
101 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
102 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
103 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c0, vget_low_f32(va4), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
104 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
105 vacc0x4567 = vfmaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
106 vacc1x4567 = vfmaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
107 vacc2x4567 = vfmaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
108 vacc3x4567 = vfmaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128()
[all …]
D6x8-neon-lane-ld128.c99 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
100 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
101 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
102 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
103 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c0, vget_low_f32(va4), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
104 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
105 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
106 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
107 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
108 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld128()
[all …]
D4x8-neon-lane-ld128.c81 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
82 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
83 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
84 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
85 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
86 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
87 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
88 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
93 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, vget_low_f32(va0), 1); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
94 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, vget_low_f32(va1), 1); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
[all …]
D4x8-neonfma-lane-ld128.c81 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
82 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
83 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
84 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
85 vacc0x4567 = vfmaq_lane_f32(vacc0x4567, vb4567c0, vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
86 vacc1x4567 = vfmaq_lane_f32(vacc1x4567, vb4567c0, vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
87 vacc2x4567 = vfmaq_lane_f32(vacc2x4567, vb4567c0, vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
88 vacc3x4567 = vfmaq_lane_f32(vacc3x4567, vb4567c0, vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
93 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, vget_low_f32(va0), 1); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
94 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, vget_low_f32(va1), 1); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
[all …]
D6x8-neonfma-dup-ld128.c99 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
100 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
101 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
102 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
103 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
104 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
121 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
122 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
123 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
124 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
[all …]
D6x8-neon-dup-ld128.c99 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
100 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
101 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
102 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
103 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
104 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
121 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
122 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
123 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
124 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
[all …]
/external/XNNPACK/src/f32-spmm/gen/
D4x4-neonfma.c127 float32x2_t vout01c0 = vmin_f32(vacc01c0, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x4__neonfma()
128 float32x2_t vout01c1 = vmin_f32(vacc01c1, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x4__neonfma()
129 float32x2_t vout01c2 = vmin_f32(vacc01c2, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x4__neonfma()
130 float32x2_t vout01c3 = vmin_f32(vacc01c3, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x4__neonfma()
132 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma()
133 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma()
134 vout01c2 = vmax_f32(vout01c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma()
135 vout01c3 = vmax_f32(vout01c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma()
159 float32x2_t vout01 = vmin_f32(vacc01, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x4__neonfma()
160 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma()
[all …]
D8x4-neonfma.c223 float32x2_t vout01c0 = vmin_f32(vacc01c0, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_8x4__neonfma()
224 float32x2_t vout01c1 = vmin_f32(vacc01c1, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_8x4__neonfma()
225 float32x2_t vout01c2 = vmin_f32(vacc01c2, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_8x4__neonfma()
226 float32x2_t vout01c3 = vmin_f32(vacc01c3, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_8x4__neonfma()
228 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma()
229 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma()
230 vout01c2 = vmax_f32(vout01c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma()
231 vout01c3 = vmax_f32(vout01c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma()
255 float32x2_t vout01 = vmin_f32(vacc01, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_8x4__neonfma()
256 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma()
[all …]
D4x2-neonfma.c113 float32x2_t vout01c0 = vmin_f32(vacc01c0, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x2__neonfma()
114 float32x2_t vout01c1 = vmin_f32(vacc01c1, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x2__neonfma()
116 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma()
117 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma()
139 float32x2_t vout01 = vmin_f32(vacc01, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x2__neonfma()
140 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma()
171 float32x2_t vout0c0 = vmin_f32(vacc0c0, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x2__neonfma()
172 float32x2_t vout0c1 = vmin_f32(vacc0c1, vget_low_f32(vmax)); in xnn_f32_spmm_ukernel_4x2__neonfma()
174 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma()
175 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma()
[all …]
/external/XNNPACK/src/f32-ppmm/gen/
D8x8-neon.c91 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123, vget_low_f32(va0123), 0); in xnn_f32_ppmm_ukernel_8x8__neon()
92 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123, vget_low_f32(va0123), 1); in xnn_f32_ppmm_ukernel_8x8__neon()
95 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123, vget_low_f32(va4567), 0); in xnn_f32_ppmm_ukernel_8x8__neon()
96 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123, vget_low_f32(va4567), 1); in xnn_f32_ppmm_ukernel_8x8__neon()
99 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567, vget_low_f32(va0123), 0); in xnn_f32_ppmm_ukernel_8x8__neon()
100 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567, vget_low_f32(va0123), 1); in xnn_f32_ppmm_ukernel_8x8__neon()
103 vacc4x4567 = vmlaq_lane_f32(vacc4x4567, vb4567, vget_low_f32(va4567), 0); in xnn_f32_ppmm_ukernel_8x8__neon()
104 vacc5x4567 = vmlaq_lane_f32(vacc5x4567, vb4567, vget_low_f32(va4567), 1); in xnn_f32_ppmm_ukernel_8x8__neon()
196 float32x2_t vacc7x01 = vget_low_f32(vacc7x0123); in xnn_f32_ppmm_ukernel_8x8__neon()
197 float32x2_t vacc6x01 = vget_low_f32(vacc6x0123); in xnn_f32_ppmm_ukernel_8x8__neon()
[all …]
D4x8-neon.c66 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123, vget_low_f32(va0123), 0); in xnn_f32_ppmm_ukernel_4x8__neon()
67 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123, vget_low_f32(va0123), 1); in xnn_f32_ppmm_ukernel_4x8__neon()
70 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567, vget_low_f32(va0123), 0); in xnn_f32_ppmm_ukernel_4x8__neon()
71 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567, vget_low_f32(va0123), 1); in xnn_f32_ppmm_ukernel_4x8__neon()
127 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_ppmm_ukernel_4x8__neon()
128 float32x2_t vacc2x01 = vget_low_f32(vacc2x0123); in xnn_f32_ppmm_ukernel_4x8__neon()
129 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_ppmm_ukernel_4x8__neon()
130 float32x2_t vacc0x01 = vget_low_f32(vacc0x0123); in xnn_f32_ppmm_ukernel_4x8__neon()
/external/XNNPACK/src/f32-gavgpool-spchw/
Dneon-x4.c78 const float32x4_t vsum01 = vcombine_f32(vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)), in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
79 vadd_f32(vget_low_f32(vsum1), vget_high_f32(vsum1))); in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
80 const float32x4_t vsum23 = vcombine_f32(vadd_f32(vget_low_f32(vsum2), vget_high_f32(vsum2)), in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
81 vadd_f32(vget_low_f32(vsum3), vget_high_f32(vsum3))); in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
82 const float32x4_t vsum = vcombine_f32(vpadd_f32(vget_low_f32(vsum01), vget_high_f32(vsum01)), in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
83 vpadd_f32(vget_low_f32(vsum23), vget_high_f32(vsum23))); in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
114 float32x2_t vsum = vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)); in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
117 float32x2_t vout = vmul_f32(vsum, vget_low_f32(vmultiplier)); in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
119 vout = vmax_f32(vout, vget_low_f32(voutput_min)); in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
120 vout = vmin_f32(vout, vget_low_f32(voutput_max)); in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
/external/webrtc/webrtc/modules/audio_processing/aec/
Daec_rdft_neon.c34 float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v)); in cft1st_128_neon()
36 float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v)); in cft1st_128_neon()
63 a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v)); in cft1st_128_neon()
64 a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v)); in cft1st_128_neon()
115 vst1_f32(&a[j + 0], vget_low_f32(xx0)); in cftmdl_128_neon()
117 vst1_f32(&a[j + 16], vget_low_f32(xx1)); in cftmdl_128_neon()
122 vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add)); in cftmdl_128_neon()
123 vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub)); in cftmdl_128_neon()
124 vst1_f32(&a[j + 40], vget_low_f32(yy4)); in cftmdl_128_neon()
169 vst1_f32(&a[j + 0], vget_low_f32(xx)); in cftmdl_128_neon()
[all …]

12345678910>>...15