/external/XNNPACK/src/qu8-vaddc/gen/ |
D | minmax-neon-ld64-x8.c | 48 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() local 50 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 52 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 54 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 70 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() local 71 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 72 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 75 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 76 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 79 vst1_lane_u16((void*) output, vreinterpret_u16_u8(vout01234567), 0); output += 2; in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-neon-ld64-x8.c | 48 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() local 50 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 52 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 54 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 70 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() local 71 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 72 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 75 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 76 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 79 vst1_lane_u16((void*) output, vreinterpret_u16_s8(vout01234567), 0); output += 2; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() [all …]
|
D | minmax-neon-ld64-x16.c | 77 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() local 78 vout01234567 = vmax_s8(vout01234567, vget_low_s8(voutput_min)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 79 vout01234567 = vmin_s8(vout01234567, vget_low_s8(voutput_max)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 82 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 86 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 87 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 90 vst1_lane_u16((void*) output, vreinterpret_u16_s8(vout01234567), 0); output += 2; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 91 vout01234567 = vext_s8(vout01234567, vout01234567, 2); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 94 vst1_lane_s8(output, vout01234567, 0); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
|
/external/XNNPACK/src/qu8-vmulc/gen/ |
D | minmax-fp32-neonv8-ld64-x8.c | 60 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 62 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 65 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 67 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 69 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 97 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 99 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 100 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 102 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 103 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() [all …]
|
D | minmax-fp32-neon-ld64-x8.c | 62 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 64 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 67 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 69 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 71 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 101 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 103 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 104 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 106 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 107 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() [all …]
|
D | minmax-rndnu-neon-ld64-x8.c | 61 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() local 63 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() local 66 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 68 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 70 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 98 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() local 100 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 101 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 103 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 104 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() [all …]
|
/external/XNNPACK/src/qs8-vmulc/gen/ |
D | minmax-fp32-neonv8-ld64-x8.c | 60 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 62 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 65 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 67 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 69 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 97 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 99 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 100 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 102 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 103 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() [all …]
|
D | minmax-fp32-neon-ld64-x8.c | 62 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 64 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 67 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 69 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 71 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 101 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 103 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 104 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 106 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 107 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() [all …]
|
D | minmax-rndnu-neon-ld64-x8.c | 61 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() local 63 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() local 66 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 68 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 70 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 98 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() local 100 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 101 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 103 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() 104 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8() [all …]
|
/external/XNNPACK/src/qs8-vmul/gen/ |
D | minmax-fp32-neonv8-ld64-x8.c | 60 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 62 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 65 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 67 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 69 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 99 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 101 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 102 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 104 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 105 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() [all …]
|
D | minmax-rndnu-neon-ld64-x8.c | 61 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() local 63 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() local 66 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 68 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 70 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 100 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() local 102 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 103 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 105 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 106 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() [all …]
|
D | minmax-fp32-neon-ld64-x8.c | 62 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() local 64 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() local 67 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 69 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 71 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 103 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() local 105 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 106 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 108 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 109 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() [all …]
|
/external/XNNPACK/src/qu8-vmul/gen/ |
D | minmax-fp32-neonv8-ld64-x8.c | 60 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 62 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 65 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 67 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 69 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 99 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 101 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 102 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 104 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 105 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() [all …]
|
D | minmax-rndnu-neon-ld64-x8.c | 61 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() local 63 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() local 66 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 68 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 70 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 100 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() local 102 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 103 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 105 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() 106 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8() [all …]
|
D | minmax-fp32-neon-ld64-x8.c | 62 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() local 64 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() local 67 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 69 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 71 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 103 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() local 105 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 106 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 108 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 109 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8() [all …]
|
/external/XNNPACK/src/qu8-vadd/gen/ |
D | minmax-neon-ld64-x8.c | 51 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() local 53 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 55 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 57 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 78 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() local 79 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 80 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 83 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 84 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 87 vst1_lane_u16((void*) output, vreinterpret_u16_u8(vout01234567), 0); output += 2; in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-neon-ld64-x8.c | 51 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() local 53 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 55 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 57 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 78 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() local 79 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 80 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 83 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 84 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() 87 vst1_lane_u16((void*) output, vreinterpret_u16_s8(vout01234567), 0); output += 2; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() [all …]
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-fp32-neonv8-c8.c | 100 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() local 102 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() local 105 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 107 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 109 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 147 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() local 148 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 149 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 152 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 153 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() [all …]
|
D | 7x-minmax-rndnu-neon-c8.c | 101 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() local 103 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() local 106 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 108 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 110 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 148 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() local 149 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 150 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 153 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 154 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() [all …]
|
D | 7x-minmax-fp32-neon-c8.c | 102 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() local 104 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() local 107 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 109 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 111 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 151 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() local 152 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 153 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 156 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 157 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() [all …]
|
/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up8x3-minmax-fp32-neonv8-mla8-ld64.c | 97 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() local 103 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() local 106 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() 108 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() 110 vst1_s8(output, vout01234567); output += 8; in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() 155 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() local 156 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() 157 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() 160 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() 161 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neonv8_mla8_ld64() [all …]
|
D | up8x3-minmax-fp32-neon-mla8-ld64.c | 99 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() local 104 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() local 107 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() 109 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() 111 vst1_s8(output, vout01234567); output += 8; in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() 158 int8x8_t vout01234567 = vqmovn_s16(vacc01234567); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() local 159 vout01234567 = vmax_s8(vout01234567, voutput_min); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() 160 vout01234567 = vmin_s8(vout01234567, voutput_max); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() 163 vst1_lane_u32((void*) output, vreinterpret_u32_s8(vout01234567), 0); output += 4; in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() 164 vout01234567 = vext_s8(vout01234567, vout01234567, 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x3__neon_mla8_ld64() [all …]
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7x-minmax-rndnu-neon-c8.c | 101 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() local 103 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() local 106 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 108 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 110 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 148 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() local 149 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 150 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 153 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() 154 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7x__neon_c8() [all …]
|
D | 7x-minmax-fp32-neonv8-c8.c | 100 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() local 102 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() local 105 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 107 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 109 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 147 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() local 148 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 149 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 152 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() 153 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c8() [all …]
|
D | 7x-minmax-fp32-neon-c8.c | 102 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() local 104 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() local 107 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 109 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 111 vst1_u8(output, vout01234567); output += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 151 uint8x8_t vout01234567 = vqmovun_s16(vacc01234567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() local 152 vout01234567 = vmax_u8(vout01234567, voutput_min); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 153 vout01234567 = vmin_u8(vout01234567, voutput_max); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 156 vst1_lane_u32((void*) output, vreinterpret_u32_u8(vout01234567), 0); output += 4; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() 157 vout01234567 = vext_u8(vout01234567, vout01234567, 4); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__neon_c8() [all …]
|