/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-wasmsimd-x32.c | 51 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 52 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 53 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 54 …const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vaccCDEF, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 55 …const v128_t vremGHIJ = wasm_i32x4_add(wasm_v128_and(vaccGHIJ, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 56 …const v128_t vremKLMN = wasm_i32x4_add(wasm_v128_and(vaccKLMN, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 57 …const v128_t vremOPQR = wasm_i32x4_add(wasm_v128_and(vaccOPQR, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 58 …const v128_t vremSTUV = wasm_i32x4_add(wasm_v128_and(vaccSTUV, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 60 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 61 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() [all …]
|
D | minmax-wasmsimd-x24.c | 48 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 49 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 50 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 51 …const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vaccCDEF, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 52 …const v128_t vremGHIJ = wasm_i32x4_add(wasm_v128_and(vaccGHIJ, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 53 …const v128_t vremKLMN = wasm_i32x4_add(wasm_v128_and(vaccKLMN, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 55 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 56 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 57 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 58 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() [all …]
|
D | minmax-wasmsimd-x16.c | 45 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 46 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 47 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 48 …const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vaccCDEF, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 50 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 51 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 52 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 53 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 75 … vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(vacc0123, 31)); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 76 … vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc4567, 31)); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() [all …]
|
D | minmax-wasmsimd-x8.c | 42 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 43 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 45 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 46 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 66 … vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(vacc0123, 31)); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 67 … vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc4567, 31)); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 69 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() 70 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-wasmsimd-x32.c | 64 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 65 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 66 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 67 …const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vaccCDEF, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 68 …const v128_t vremGHIJ = wasm_i32x4_add(wasm_v128_and(vaccGHIJ, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 69 …const v128_t vremKLMN = wasm_i32x4_add(wasm_v128_and(vaccKLMN, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 70 …const v128_t vremOPQR = wasm_i32x4_add(wasm_v128_and(vaccOPQR, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 71 …const v128_t vremSTUV = wasm_i32x4_add(wasm_v128_and(vaccSTUV, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 73 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 74 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() [all …]
|
D | minmax-wasmsimd-x24.c | 58 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 59 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 60 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 61 …const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vaccCDEF, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 62 …const v128_t vremGHIJ = wasm_i32x4_add(wasm_v128_and(vaccGHIJ, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 63 …const v128_t vremKLMN = wasm_i32x4_add(wasm_v128_and(vaccKLMN, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 65 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 66 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 67 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 68 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() [all …]
|
D | minmax-wasmsimd-x16.c | 52 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 53 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 54 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 55 …const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vaccCDEF, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 57 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 58 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 59 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 60 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 87 … vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(vacc0123, 31)); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 88 … vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc4567, 31)); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() [all …]
|
D | minmax-wasmsimd-x8.c | 46 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 47 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 49 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 50 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 74 … vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(vacc0123, 31)); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 75 … vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc4567, 31)); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 77 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 78 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-wasmsimd-mul16.c | 264 const v128_t vsign0123 = wasm_i32x4_shr(vacc0123, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 265 const v128_t vsign4567 = wasm_i32x4_shr(vacc4567, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 266 const v128_t vsign89AB = wasm_i32x4_shr(vacc89AB, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 267 const v128_t vsignCDEF = wasm_i32x4_shr(vaccCDEF, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 268 const v128_t vsignGHIJ = wasm_i32x4_shr(vaccGHIJ, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 269 const v128_t vsignKLMN = wasm_i32x4_shr(vaccKLMN, 31); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 307 …m0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, … in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 308 …m4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, … in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 309 …m89AB = wasm_i32x4_add(wasm_v128_and(vq31prod89AB, vremainder_mask), wasm_i32x4_shr(vq31prod89AB, … in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 310 …mCDEF = wasm_i32x4_add(wasm_v128_and(vq31prodCDEF, vremainder_mask), wasm_i32x4_shr(vq31prodCDEF, … in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() [all …]
|
D | up16x9-minmax-wasmsimd-mul16.c | 217 const v128_t vsign0123 = wasm_i32x4_shr(vacc0123, 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 218 const v128_t vsign4567 = wasm_i32x4_shr(vacc4567, 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 219 const v128_t vsign89AB = wasm_i32x4_shr(vacc89AB, 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 220 const v128_t vsignCDEF = wasm_i32x4_shr(vaccCDEF, 31); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 248 …m0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, … in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 249 …m4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, … in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 250 …m89AB = wasm_i32x4_add(wasm_v128_and(vq31prod89AB, vremainder_mask), wasm_i32x4_shr(vq31prod89AB, … in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 251 …mCDEF = wasm_i32x4_add(wasm_v128_and(vq31prodCDEF, vremainder_mask), wasm_i32x4_shr(vq31prodCDEF, … in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 255 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 256 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() [all …]
|
D | up8x9-minmax-wasmsimd-mul16.c | 170 const v128_t vsign0123 = wasm_i32x4_shr(vacc0123, 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 171 const v128_t vsign4567 = wasm_i32x4_shr(vacc4567, 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 189 …m0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, … in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 190 …m4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, … in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 194 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 195 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold… in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 286 const v128_t vsign0123 = wasm_i32x4_shr(vacc0123, 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 287 const v128_t vsign4567 = wasm_i32x4_shr(vacc4567, 31); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 305 …m0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, … in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 306 …m4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, … in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-p5-div-x24.c | 152 vf0123 = wasm_v128_bitselect(vf0123, wasm_f32x4_sub(vone, vf0123), wasm_i32x4_shr(vx0123, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 153 vf4567 = wasm_v128_bitselect(vf4567, wasm_f32x4_sub(vone, vf4567), wasm_i32x4_shr(vx4567, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 154 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 155 vfCDEF = wasm_v128_bitselect(vfCDEF, wasm_f32x4_sub(vone, vfCDEF), wasm_i32x4_shr(vxCDEF, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 156 vfGHIJ = wasm_v128_bitselect(vfGHIJ, wasm_f32x4_sub(vone, vfGHIJ), wasm_i32x4_shr(vxGHIJ, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 157 vfKLMN = wasm_v128_bitselect(vfKLMN, wasm_f32x4_sub(vone, vfKLMN), wasm_i32x4_shr(vxKLMN, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 191 vf = wasm_v128_bitselect(vf, wasm_f32x4_sub(vone, vf), wasm_i32x4_shr(vx, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 219 vf = wasm_v128_bitselect(vf, wasm_f32x4_sub(vone, vf), wasm_i32x4_shr(vx, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24()
|
D | wasmsimd-p5-div-x20.c | 136 vf0123 = wasm_v128_bitselect(vf0123, wasm_f32x4_sub(vone, vf0123), wasm_i32x4_shr(vx0123, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 137 vf4567 = wasm_v128_bitselect(vf4567, wasm_f32x4_sub(vone, vf4567), wasm_i32x4_shr(vx4567, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 138 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 139 vfCDEF = wasm_v128_bitselect(vfCDEF, wasm_f32x4_sub(vone, vfCDEF), wasm_i32x4_shr(vxCDEF, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 140 vfGHIJ = wasm_v128_bitselect(vfGHIJ, wasm_f32x4_sub(vone, vfGHIJ), wasm_i32x4_shr(vxGHIJ, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 173 vf = wasm_v128_bitselect(vf, wasm_f32x4_sub(vone, vf), wasm_i32x4_shr(vx, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 201 vf = wasm_v128_bitselect(vf, wasm_f32x4_sub(vone, vf), wasm_i32x4_shr(vx, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|
D | wasmsimd-p5-div-x16.c | 120 vf0123 = wasm_v128_bitselect(vf0123, wasm_f32x4_sub(vone, vf0123), wasm_i32x4_shr(vx0123, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 121 vf4567 = wasm_v128_bitselect(vf4567, wasm_f32x4_sub(vone, vf4567), wasm_i32x4_shr(vx4567, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 122 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 123 vfCDEF = wasm_v128_bitselect(vfCDEF, wasm_f32x4_sub(vone, vfCDEF), wasm_i32x4_shr(vxCDEF, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 155 vf = wasm_v128_bitselect(vf, wasm_f32x4_sub(vone, vf), wasm_i32x4_shr(vx, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 183 vf = wasm_v128_bitselect(vf, wasm_f32x4_sub(vone, vf), wasm_i32x4_shr(vx, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | wasmsimd-p5-div-x12.c | 104 vf0123 = wasm_v128_bitselect(vf0123, wasm_f32x4_sub(vone, vf0123), wasm_i32x4_shr(vx0123, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 105 vf4567 = wasm_v128_bitselect(vf4567, wasm_f32x4_sub(vone, vf4567), wasm_i32x4_shr(vx4567, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 106 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 137 vf = wasm_v128_bitselect(vf, wasm_f32x4_sub(vone, vf), wasm_i32x4_shr(vx, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 165 vf = wasm_v128_bitselect(vf, wasm_f32x4_sub(vone, vf), wasm_i32x4_shr(vx, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
|
/external/XNNPACK/src/qs8-vaddc/ |
D | wasmsimd.c.in | 45 … = wasm_i32x4_add(wasm_v128_and(vacc${ABC[N:N+4]}, vremainder_mask), wasm_i32x4_shr(vacc${ABC[N:N+… 48 …vacc${ABC[N:N+4]} = wasm_i32x4_sub(wasm_i32x4_shr(vacc${ABC[N:N+4]}, vshift), wasm_i32x4_gt(vrem${… 91 …]} = wasm_i32x4_add(wasm_v128_and(vacc${ABC[0:4]}, vremainder_mask), wasm_i32x4_shr(vacc${ABC[0:4]… 92 …]} = wasm_i32x4_add(wasm_v128_and(vacc${ABC[4:8]}, vremainder_mask), wasm_i32x4_shr(vacc${ABC[4:8]… 94 …vacc${ABC[0:4]} = wasm_i32x4_sub(wasm_i32x4_shr(vacc${ABC[0:4]}, vshift), wasm_i32x4_gt(vrem${ABC[… 95 …vacc${ABC[4:8]} = wasm_i32x4_sub(wasm_i32x4_shr(vacc${ABC[4:8]}, vshift), wasm_i32x4_gt(vrem${ABC[…
|
/external/XNNPACK/src/qs8-dwconv/ |
D | unipass-wasmsimd-mul16.c.in | 68 const v128_t vsign${ABC[C:C+4]} = wasm_i32x4_shr(vacc${ABC[C:C+4]}, 31); 84 …asm_i32x4_add(wasm_v128_and(vq31prod${ABC[C:C+4]}, vremainder_mask), wasm_i32x4_shr(vq31prod${ABC[… 89 …vacc${ABC[C:C+4]} = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod${ABC[C:C+4]}, vshift), wasm_i32x4_gt(vr… 143 const v128_t vsign${ABC[0:4]} = wasm_i32x4_shr(vacc${ABC[0:4]}, 31); 144 const v128_t vsign${ABC[4:8]} = wasm_i32x4_shr(vacc${ABC[4:8]}, 31); 162 … wasm_i32x4_add(wasm_v128_and(vq31prod${ABC[0:4]}, vremainder_mask), wasm_i32x4_shr(vq31prod${ABC[… 163 … wasm_i32x4_add(wasm_v128_and(vq31prod${ABC[4:8]}, vremainder_mask), wasm_i32x4_shr(vq31prod${ABC[… 167 …vacc${ABC[0:4]} = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod${ABC[0:4]}, vshift), wasm_i32x4_gt(vrem${… 168 …vacc${ABC[4:8]} = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod${ABC[4:8]}, vshift), wasm_i32x4_gt(vrem${…
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-wasmsimd.c | 104 wasm_i32x4_sub(wasm_i32x4_shr(x_q31product, shift), wasm_i32x4_gt(x_remainder, vthreshold)); in xnn_qs8_requantize_q31__wasmsimd() 106 wasm_i32x4_sub(wasm_i32x4_shr(y_q31product, shift), wasm_i32x4_gt(y_remainder, vthreshold)); in xnn_qs8_requantize_q31__wasmsimd() 108 wasm_i32x4_sub(wasm_i32x4_shr(z_q31product, shift), wasm_i32x4_gt(z_remainder, vthreshold)); in xnn_qs8_requantize_q31__wasmsimd() 110 wasm_i32x4_sub(wasm_i32x4_shr(w_q31product, shift), wasm_i32x4_gt(w_remainder, vthreshold)); in xnn_qs8_requantize_q31__wasmsimd()
|
/external/XNNPACK/src/qu8-requantization/ |
D | q31-wasmsimd.c | 104 wasm_i32x4_sub(wasm_i32x4_shr(x_q31product, shift), wasm_i32x4_gt(x_remainder, vthreshold)); in xnn_qu8_requantize_q31__wasmsimd() 106 wasm_i32x4_sub(wasm_i32x4_shr(y_q31product, shift), wasm_i32x4_gt(y_remainder, vthreshold)); in xnn_qu8_requantize_q31__wasmsimd() 108 wasm_i32x4_sub(wasm_i32x4_shr(z_q31product, shift), wasm_i32x4_gt(z_remainder, vthreshold)); in xnn_qu8_requantize_q31__wasmsimd() 110 wasm_i32x4_sub(wasm_i32x4_shr(w_q31product, shift), wasm_i32x4_gt(w_remainder, vthreshold)); in xnn_qu8_requantize_q31__wasmsimd()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-arm-rr2-p6-x24.c | 158 const v128_t vsignm0123 = wasm_i32x4_shr(vx0123, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 160 const v128_t vsignm4567 = wasm_i32x4_shr(vx4567, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 162 const v128_t vsignm89AB = wasm_i32x4_shr(vx89AB, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 164 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 166 const v128_t vsignmGHIJ = wasm_i32x4_shr(vxGHIJ, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 168 const v128_t vsignmKLMN = wasm_i32x4_shr(vxKLMN, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 210 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 240 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 142 const v128_t vsignm0123 = wasm_i32x4_shr(vx0123, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 144 const v128_t vsignm4567 = wasm_i32x4_shr(vx4567, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 146 const v128_t vsignm89AB = wasm_i32x4_shr(vx89AB, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 148 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 150 const v128_t vsignmGHIJ = wasm_i32x4_shr(vxGHIJ, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 190 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 220 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 126 const v128_t vsignm0123 = wasm_i32x4_shr(vx0123, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 128 const v128_t vsignm4567 = wasm_i32x4_shr(vx4567, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 130 const v128_t vsignm89AB = wasm_i32x4_shr(vx89AB, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 132 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 170 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 200 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 176 const v128_t vsignm0123 = wasm_i32x4_shr(vx0123, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 178 const v128_t vsignm4567 = wasm_i32x4_shr(vx4567, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 180 const v128_t vsignm89AB = wasm_i32x4_shr(vx89AB, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 182 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 184 const v128_t vsignmGHIJ = wasm_i32x4_shr(vxGHIJ, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 186 const v128_t vsignmKLMN = wasm_i32x4_shr(vxKLMN, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 231 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 264 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 157 const v128_t vsignm0123 = wasm_i32x4_shr(vx0123, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 159 const v128_t vsignm4567 = wasm_i32x4_shr(vx4567, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 161 const v128_t vsignm89AB = wasm_i32x4_shr(vx89AB, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 163 const v128_t vsignmCDEF = wasm_i32x4_shr(vxCDEF, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 165 const v128_t vsignmGHIJ = wasm_i32x4_shr(vxGHIJ, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 208 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 241 const v128_t vsignm = wasm_i32x4_shr(vx, 31); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
/external/XNNPACK/src/qs8-vadd/ |
D | wasmsimd.c.in | 51 … = wasm_i32x4_add(wasm_v128_and(vacc${ABC[N:N+4]}, vremainder_mask), wasm_i32x4_shr(vacc${ABC[N:N+… 54 …vacc${ABC[N:N+4]} = wasm_i32x4_sub(wasm_i32x4_shr(vacc${ABC[N:N+4]}, vshift), wasm_i32x4_gt(vrem${… 102 …]} = wasm_i32x4_add(wasm_v128_and(vacc${ABC[0:4]}, vremainder_mask), wasm_i32x4_shr(vacc${ABC[0:4]… 103 …]} = wasm_i32x4_add(wasm_v128_and(vacc${ABC[4:8]}, vremainder_mask), wasm_i32x4_shr(vacc${ABC[4:8]… 105 …vacc${ABC[0:4]} = wasm_i32x4_sub(wasm_i32x4_shr(vacc${ABC[0:4]}, vshift), wasm_i32x4_gt(vrem${ABC[… 106 …vacc${ABC[4:8]} = wasm_i32x4_sub(wasm_i32x4_shr(vacc${ABC[4:8]}, vshift), wasm_i32x4_gt(vrem${ABC[…
|