/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x4-minmax-neon-lane-ld64.c | 101 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() local 103 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() 104 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() 105 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() 106 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64()
|
D | 4x4-minmax-neonfma-lane-ld64.c | 101 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() local 103 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() 104 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() 105 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() 106 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64()
|
D | 6x8-minmax-neonfma-lane-ld64.c | 138 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() local 141 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 142 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 143 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 144 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 145 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 146 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c1, va5, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-minmax-neon-lane-ld64.c | 138 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() local 141 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 142 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 143 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 144 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 145 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 146 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c1, va5, 1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 6x8-minmax-neonfma-dup-ld64.c | 144 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() local 153 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 154 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 155 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 156 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 157 vacc4x0123 = vfmaq_f32(vacc4x0123, va4c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 158 vacc5x0123 = vfmaq_f32(vacc5x0123, va5c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 6x8-minmax-neon-dup-ld64.c | 144 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() local 153 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 154 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 155 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 156 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 157 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 158 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 4x8-minmax-neon-dup-ld64.c | 114 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() local 121 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 122 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 123 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 124 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-minmax-neonfma-dup-ld64.c | 114 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() local 121 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 122 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 123 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 124 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 6x8-minmax-neonfma-lane-ld64.c | 110 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() local 113 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 114 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 115 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 116 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 117 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 118 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c1, va5, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-minmax-neon-lane-ld64.c | 110 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() local 113 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 114 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 115 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 116 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 117 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 118 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c1, va5, 1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 5x8-minmax-neonfma-lane-ld64.c | 99 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() local 102 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 103 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 104 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 105 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 106 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
|
D | 5x8-minmax-neon-lane-ld64.c | 99 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() local 102 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 103 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 104 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 105 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 106 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
|
D | 6x8-minmax-neon-dup-ld64.c | 116 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() local 125 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 126 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 127 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 128 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 129 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 130 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 6x8-minmax-neonfma-dup-ld64.c | 116 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() local 125 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 126 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 127 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 128 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 129 vacc4x0123 = vfmaq_f32(vacc4x0123, va4c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 130 vacc5x0123 = vfmaq_f32(vacc5x0123, va5c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 4x8-minmax-neon-lane-ld64.c | 88 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() local 91 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() 92 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() 93 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() 94 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64()
|
D | 4x8-minmax-neonfma-lane-ld64.c | 88 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() local 91 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() 92 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() 93 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() 94 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8-minmax-neonfma-dup-ld64.c | 92 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() local 99 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 100 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 101 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 102 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 6x8inc-minmax-neon-lane-ld64.c | 112 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() local 115 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 116 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 117 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 118 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 119 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 120 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c1, va5, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 6x8inc-minmax-neonfma-lane-ld64.c | 112 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() local 115 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 116 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 117 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 118 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 119 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 120 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c1, va5, 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 5x8inc-minmax-neon-lane-ld64.c | 101 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() local 104 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 105 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 106 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 107 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 108 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
|
D | 5x8inc-minmax-neonfma-lane-ld64.c | 101 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() local 104 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 105 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 106 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 107 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 108 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c1, va4, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
|
D | 6x8inc-minmax-neon-dup-ld64.c | 118 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() local 127 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 128 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 129 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 130 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 131 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 132 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 6x8inc-minmax-neonfma-dup-ld64.c | 118 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() local 127 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 128 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 129 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 130 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 131 vacc4x0123 = vfmaq_f32(vacc4x0123, va4c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 132 vacc5x0123 = vfmaq_f32(vacc5x0123, va5c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 4x8inc-minmax-neonfma-lane-ld64.c | 90 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() local 93 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c1, va0, 1); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() 94 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c1, va1, 1); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() 95 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c1, va2, 1); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() 96 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c1, va3, 1); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8inc-minmax-neon-dup-ld64.c | 94 const float32x4_t vb0123c1 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() local 101 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() 102 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() 103 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() 104 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c1, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64()
|