/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x4-minmax-neon-lane-ld64.c | 95 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() local 97 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() 98 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() 99 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() 100 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64()
|
D | 4x4-minmax-neonfma-lane-ld64.c | 95 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() local 97 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() 98 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() 99 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() 100 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64()
|
D | 6x8-minmax-neonfma-lane-ld64.c | 123 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() local 126 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 127 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 128 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 129 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 130 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 131 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c0, va5, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-minmax-neon-lane-ld64.c | 123 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() local 126 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 127 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 128 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 129 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 130 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 131 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c0, va5, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 6x8-minmax-neonfma-dup-ld64.c | 123 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() local 132 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 133 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 134 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 135 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 136 vacc4x0123 = vfmaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 137 vacc5x0123 = vfmaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 6x8-minmax-neon-dup-ld64.c | 123 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() local 132 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 133 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 134 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 135 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 136 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 137 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 4x8-minmax-neon-dup-ld64.c | 99 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() local 106 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 107 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 108 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 109 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-minmax-neonfma-dup-ld64.c | 99 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() local 106 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 107 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 108 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 109 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 6x8-minmax-neonfma-lane-ld64.c | 95 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() local 98 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 99 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 100 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 101 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 102 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 103 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c0, va5, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-minmax-neon-lane-ld64.c | 95 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() local 98 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 99 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 100 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 101 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 102 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 103 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c0, va5, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 5x8-minmax-neonfma-lane-ld64.c | 86 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() local 89 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 90 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 91 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 92 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 93 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
|
D | 5x8-minmax-neon-lane-ld64.c | 86 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() local 89 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 90 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 91 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 92 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 93 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
|
D | 6x8-minmax-neon-dup-ld64.c | 95 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() local 104 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 105 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 106 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 107 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 108 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 109 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 6x8-minmax-neonfma-dup-ld64.c | 95 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() local 104 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 105 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 106 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 107 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 108 vacc4x0123 = vfmaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 109 vacc5x0123 = vfmaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 4x8-minmax-neon-lane-ld64.c | 77 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() local 80 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() 81 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() 82 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() 83 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64()
|
D | 4x8-minmax-neonfma-lane-ld64.c | 77 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() local 80 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() 81 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() 82 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() 83 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8-minmax-neonfma-dup-ld64.c | 77 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() local 84 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 85 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 86 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 87 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 6x8inc-minmax-neon-lane-ld64.c | 97 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() local 100 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 101 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 102 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 103 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 104 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 105 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c0, va5, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 6x8inc-minmax-neonfma-lane-ld64.c | 97 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() local 100 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 101 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 102 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 103 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 104 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 105 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c0, va5, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 5x8inc-minmax-neon-lane-ld64.c | 88 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() local 91 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 92 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 93 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 94 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 95 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
|
D | 5x8inc-minmax-neonfma-lane-ld64.c | 88 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() local 91 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 92 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 93 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 94 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 95 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c0, va4, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
|
D | 6x8inc-minmax-neon-dup-ld64.c | 97 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() local 106 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 107 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 108 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 109 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 110 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 111 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 6x8inc-minmax-neonfma-dup-ld64.c | 97 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() local 106 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 107 vacc1x0123 = vfmaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 108 vacc2x0123 = vfmaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 109 vacc3x0123 = vfmaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 110 vacc4x0123 = vfmaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 111 vacc5x0123 = vfmaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 4x8inc-minmax-neonfma-lane-ld64.c | 79 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() local 82 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c0, va0, 0); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() 83 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c0, va1, 0); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() 84 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c0, va2, 0); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() 85 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c0, va3, 0); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8inc-minmax-neon-dup-ld64.c | 79 const float32x4_t vb0123c0 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() local 86 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() 87 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() 88 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() 89 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64()
|