/external/XNNPACK/src/f32-spmm/gen/ |
D | 12x2-minmax-neonfma.c | 66 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() local 73 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 80 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 159 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() local 164 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 169 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 231 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() local 234 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 237 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma()
|
D | 16x2-minmax-neonfma.c | 71 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() local 80 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 89 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 176 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() local 181 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 186 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 248 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() local 251 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 254 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma()
|
D | 8x2-minmax-neonfma.c | 61 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() local 66 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 71 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 137 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() local 140 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 143 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma()
|
D | 32x2-minmax-neonfma.c | 92 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local 109 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 126 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 256 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local 265 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 274 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 357 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local 362 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 367 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 429 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local [all …]
|
D | 12x4-minmax-neonfma.c | 78 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() local 91 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 104 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 199 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() local 208 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 217 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 289 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() local 294 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 299 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma()
|
D | 16x4-minmax-neonfma.c | 87 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() local 104 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 121 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 226 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() local 235 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 244 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 316 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() local 321 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 326 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma()
|
D | 8x4-minmax-neonfma.c | 69 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() local 78 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 87 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 163 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() local 168 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 173 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma()
|
D | 32x4-minmax-neonfma.c | 124 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 157 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 190 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 354 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 371 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 388 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 489 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 498 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 507 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 579 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local [all …]
|
D | 4x2-minmax-neonfma.c | 56 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() local 59 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 62 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma()
|
D | 4x4-minmax-neonfma.c | 60 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() local 65 vout0123n0 = vmaxq_f32(vout0123n0, vmin); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 70 vst1q_f32(output + 0, vout0123n0); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma()
|