/external/XNNPACK/src/f32-spmm/gen/ |
D | 32x2-minmax-neonfma.c | 50 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local 51 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 52 float32x4_t vacc89ABn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 53 float32x4_t vaccCDEFn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 54 float32x4_t vaccGHIJn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 55 float32x4_t vaccKLMNn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 56 float32x4_t vaccOPQRn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 57 float32x4_t vaccSTUVn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 82 vacc0123n1 = vfmaq_lane_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 100 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() [all …]
|
D | 16x2-minmax-neonfma.c | 46 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() local 47 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 48 float32x4_t vacc89ABn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 49 float32x4_t vaccCDEFn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 65 vacc0123n1 = vfmaq_lane_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 75 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 160 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() local 161 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 172 vacc0123n1 = vfmaq_lane_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 178 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() [all …]
|
D | 12x2-minmax-neonfma.c | 45 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() local 46 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 47 float32x4_t vacc89ABn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 61 vacc0123n1 = vfmaq_lane_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 69 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 143 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() local 144 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 155 vacc0123n1 = vfmaq_lane_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 161 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 219 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() local [all …]
|
D | 32x4-minmax-neonfma.c | 50 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 51 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 52 float32x4_t vacc89ABn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 53 float32x4_t vaccCDEFn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 54 float32x4_t vaccGHIJn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 55 float32x4_t vaccKLMNn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 56 float32x4_t vaccOPQRn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 57 float32x4_t vaccSTUVn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 98 vacc0123n1 = vfmaq_laneq_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 132 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() [all …]
|
D | 8x2-minmax-neonfma.c | 44 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() local 45 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 57 vacc0123n1 = vfmaq_lane_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 63 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 125 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() local 134 vacc0123n1 = vfmaq_lane_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 138 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma()
|
D | 12x4-minmax-neonfma.c | 45 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() local 46 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 47 float32x4_t vacc89ABn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 67 vacc0123n1 = vfmaq_laneq_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 81 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 175 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() local 176 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 191 vacc0123n1 = vfmaq_laneq_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 201 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 273 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() local [all …]
|
D | 16x4-minmax-neonfma.c | 46 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() local 47 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 48 float32x4_t vacc89ABn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 49 float32x4_t vaccCDEFn1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 73 vacc0123n1 = vfmaq_laneq_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 91 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 202 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() local 203 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 218 vacc0123n1 = vfmaq_laneq_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 228 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() [all …]
|
D | 8x4-minmax-neonfma.c | 44 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() local 45 float32x4_t vacc4567n1 = vacc0123n1; in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 61 vacc0123n1 = vfmaq_laneq_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 71 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 147 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() local 158 vacc0123n1 = vfmaq_laneq_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 164 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma()
|
D | 4x2-minmax-neonfma.c | 43 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() local 53 vacc0123n1 = vfmaq_lane_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 57 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma()
|
D | 4x4-minmax-neonfma.c | 43 float32x4_t vacc0123n1 = vld1q_dup_f32(w); w += 1; in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() local 55 vacc0123n1 = vfmaq_laneq_f32(vacc0123n1, vi0123, vw, 1); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 61 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma()
|