Home
last modified time | relevance | path

Searched refs:vdupq_lane_f32 (Results 1 – 25 of 42) sorted by relevance

12

/external/XNNPACK/src/f32-gemm/gen-inc/
D6x8-neon-dup-ld128.c101 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
102 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
103 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
104 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
105 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
106 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
123 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
124 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
125 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
126 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
[all …]
D6x8-neonfma-dup-ld128.c101 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
102 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
103 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
104 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
105 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
106 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
123 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
124 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
125 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
126 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128()
[all …]
D4x8-neonfma-dup-ld128.c83 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
84 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
85 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
86 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
99 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
100 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
101 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
102 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
115 const float32x4_t va0c2 = vdupq_lane_f32(vget_high_f32(va0), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
116 const float32x4_t va1c2 = vdupq_lane_f32(vget_high_f32(va1), 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld128()
[all …]
D4x8-neon-dup-ld128.c83 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
84 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
85 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
86 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
99 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
100 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
101 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
102 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
115 const float32x4_t va0c2 = vdupq_lane_f32(vget_high_f32(va0), 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
116 const float32x4_t va1c2 = vdupq_lane_f32(vget_high_f32(va1), 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld128()
[all …]
D6x8-neon-dup-ld64.c100 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
101 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
102 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
103 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
104 const float32x4_t va4c0 = vdupq_lane_f32(va4, 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
105 const float32x4_t va5c0 = vdupq_lane_f32(va5, 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
121 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
122 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
123 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
124 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
[all …]
D6x8-neonfma-dup-ld64.c100 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
101 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
102 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
103 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
104 const float32x4_t va4c0 = vdupq_lane_f32(va4, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
105 const float32x4_t va5c0 = vdupq_lane_f32(va5, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
121 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
122 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
123 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
124 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
[all …]
D4x8-neon-dup-ld64.c82 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
83 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
84 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
85 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
97 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
98 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
99 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
100 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
D4x8-neonfma-dup-ld64.c82 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
83 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
84 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
85 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
97 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
98 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
99 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
100 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
/external/XNNPACK/src/f32-igemm/gen/
D6x8-neonfma-dup-ld128.c127 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
128 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
129 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
130 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
131 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
132 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
149 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
150 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
151 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
152 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld128()
[all …]
D6x8-neon-dup-ld128.c127 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
128 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
129 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
130 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
131 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
132 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
149 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
150 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
151 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
152 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
[all …]
D4x8-neonfma-dup-ld128.c103 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
104 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
105 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
106 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
119 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
120 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
121 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
122 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
135 const float32x4_t va0c2 = vdupq_lane_f32(vget_high_f32(va0), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
136 const float32x4_t va1c2 = vdupq_lane_f32(vget_high_f32(va1), 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128()
[all …]
D4x8-neon-dup-ld128.c103 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
104 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
105 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
106 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
119 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
120 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
121 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
122 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
135 const float32x4_t va0c2 = vdupq_lane_f32(vget_high_f32(va0), 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
136 const float32x4_t va1c2 = vdupq_lane_f32(vget_high_f32(va1), 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld128()
[all …]
D6x8-neonfma-dup-ld64.c126 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
127 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
128 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
129 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
130 const float32x4_t va4c0 = vdupq_lane_f32(va4, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
131 const float32x4_t va5c0 = vdupq_lane_f32(va5, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
147 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
148 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
149 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
150 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
[all …]
D6x8-neon-dup-ld64.c126 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
127 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
128 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
129 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
130 const float32x4_t va4c0 = vdupq_lane_f32(va4, 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
131 const float32x4_t va5c0 = vdupq_lane_f32(va5, 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
147 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
148 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
149 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
150 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
[all …]
D4x8-neon-dup-ld64.c102 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
103 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
104 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
105 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
117 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
118 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
119 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
120 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
D4x8-neonfma-dup-ld64.c102 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
103 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
104 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
105 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
117 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
118 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
119 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
120 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
/external/XNNPACK/src/f32-gemm/gen/
D6x8-neonfma-dup-ld128.c99 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
100 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
101 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
102 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
103 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
104 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
121 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
122 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
123 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
124 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128()
[all …]
D6x8-neon-dup-ld128.c99 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
100 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
101 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
102 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
103 const float32x4_t va4c0 = vdupq_lane_f32(vget_low_f32(va4), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
104 const float32x4_t va5c0 = vdupq_lane_f32(vget_low_f32(va5), 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
121 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
122 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
123 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
124 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
[all …]
D4x8-neonfma-dup-ld128.c81 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
82 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
83 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
84 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
97 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
98 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
99 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
100 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
113 const float32x4_t va0c2 = vdupq_lane_f32(vget_high_f32(va0), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
114 const float32x4_t va1c2 = vdupq_lane_f32(vget_high_f32(va1), 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128()
[all …]
D4x8-neon-dup-ld128.c81 const float32x4_t va0c0 = vdupq_lane_f32(vget_low_f32(va0), 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
82 const float32x4_t va1c0 = vdupq_lane_f32(vget_low_f32(va1), 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
83 const float32x4_t va2c0 = vdupq_lane_f32(vget_low_f32(va2), 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
84 const float32x4_t va3c0 = vdupq_lane_f32(vget_low_f32(va3), 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
97 const float32x4_t va0c1 = vdupq_lane_f32(vget_low_f32(va0), 1); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
98 const float32x4_t va1c1 = vdupq_lane_f32(vget_low_f32(va1), 1); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
99 const float32x4_t va2c1 = vdupq_lane_f32(vget_low_f32(va2), 1); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
100 const float32x4_t va3c1 = vdupq_lane_f32(vget_low_f32(va3), 1); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
113 const float32x4_t va0c2 = vdupq_lane_f32(vget_high_f32(va0), 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
114 const float32x4_t va1c2 = vdupq_lane_f32(vget_high_f32(va1), 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld128()
[all …]
D6x8-neonfma-dup-ld64.c98 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
99 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
100 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
101 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
102 const float32x4_t va4c0 = vdupq_lane_f32(va4, 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
103 const float32x4_t va5c0 = vdupq_lane_f32(va5, 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
119 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
120 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
121 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
122 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
[all …]
D6x8-neon-dup-ld64.c98 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
99 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
100 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
101 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
102 const float32x4_t va4c0 = vdupq_lane_f32(va4, 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
103 const float32x4_t va5c0 = vdupq_lane_f32(va5, 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
119 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
120 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
121 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
122 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
[all …]
D4x8-neon-dup-ld64.c80 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
81 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
82 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
83 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
95 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
96 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
97 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
98 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
D4x8-neonfma-dup-ld64.c80 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
81 const float32x4_t va1c0 = vdupq_lane_f32(va1, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
82 const float32x4_t va2c0 = vdupq_lane_f32(va2, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
83 const float32x4_t va3c0 = vdupq_lane_f32(va3, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
95 const float32x4_t va0c1 = vdupq_lane_f32(va0, 1); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
96 const float32x4_t va1c1 = vdupq_lane_f32(va1, 1); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
97 const float32x4_t va2c1 = vdupq_lane_f32(va2, 1); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
98 const float32x4_t va3c1 = vdupq_lane_f32(va3, 1); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
/external/XNNPACK/src/f32-ppmm/gen/
D8x8-neonfma.c109 const float32x4_t va0000 = vdupq_lane_f32(vget_low_f32(va0123), 0); in xnn_f32_ppmm_ukernel_8x8__neonfma()
110 const float32x4_t va1111 = vdupq_lane_f32(vget_low_f32(va0123), 1); in xnn_f32_ppmm_ukernel_8x8__neonfma()
111 const float32x4_t va2222 = vdupq_lane_f32(vget_high_f32(va0123), 0); in xnn_f32_ppmm_ukernel_8x8__neonfma()
112 const float32x4_t va3333 = vdupq_lane_f32(vget_high_f32(va0123), 1); in xnn_f32_ppmm_ukernel_8x8__neonfma()
113 const float32x4_t va4444 = vdupq_lane_f32(vget_low_f32(va4567), 0); in xnn_f32_ppmm_ukernel_8x8__neonfma()
114 const float32x4_t va5555 = vdupq_lane_f32(vget_low_f32(va4567), 1); in xnn_f32_ppmm_ukernel_8x8__neonfma()
115 const float32x4_t va6666 = vdupq_lane_f32(vget_high_f32(va4567), 0); in xnn_f32_ppmm_ukernel_8x8__neonfma()
116 const float32x4_t va7777 = vdupq_lane_f32(vget_high_f32(va4567), 1); in xnn_f32_ppmm_ukernel_8x8__neonfma()

12