/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x4-minmax-scalar.c | 88 const float vw3 = *w++; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 113 vacc0x3 += vi0 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 114 vacc1x3 += vi1 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 115 vacc2x3 += vi2 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 116 vacc3x3 += vi3 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 117 vacc4x3 += vi4 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 118 vacc5x3 += vi5 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 119 vacc6x3 += vi6 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 120 vacc7x3 += vi7 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 335 const float vw3 = *w++; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local [all …]
|
D | 32x1-minmax-wasmsimd-arm-x4.c | 148 const v128_t vw3 = wasm_v32x4_load_splat(w); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() local 150 vacc0123x3 = wasm_f32x4_add(vacc0123x3, wasm_f32x4_mul(vi0123x3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 151 vacc4567x3 = wasm_f32x4_add(vacc4567x3, wasm_f32x4_mul(vi4567x3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 152 vacc89ABx3 = wasm_f32x4_add(vacc89ABx3, wasm_f32x4_mul(vi89ABx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 153 vaccCDEFx3 = wasm_f32x4_add(vaccCDEFx3, wasm_f32x4_mul(viCDEFx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 154 vaccGHIJx3 = wasm_f32x4_add(vaccGHIJx3, wasm_f32x4_mul(viGHIJx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 155 vaccKLMNx3 = wasm_f32x4_add(vaccKLMNx3, wasm_f32x4_mul(viKLMNx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 156 vaccOPQRx3 = wasm_f32x4_add(vaccOPQRx3, wasm_f32x4_mul(viOPQRx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 157 vaccSTUVx3 = wasm_f32x4_add(vaccSTUVx3, wasm_f32x4_mul(viSTUVx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
|
D | 32x1-minmax-wasmsimd-x86-x4.c | 148 const v128_t vw3 = wasm_v32x4_load_splat(w); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() local 150 vacc0123x3 = wasm_f32x4_add(vacc0123x3, wasm_f32x4_mul(vi0123x3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 151 vacc4567x3 = wasm_f32x4_add(vacc4567x3, wasm_f32x4_mul(vi4567x3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 152 vacc89ABx3 = wasm_f32x4_add(vacc89ABx3, wasm_f32x4_mul(vi89ABx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 153 vaccCDEFx3 = wasm_f32x4_add(vaccCDEFx3, wasm_f32x4_mul(viCDEFx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 154 vaccGHIJx3 = wasm_f32x4_add(vaccGHIJx3, wasm_f32x4_mul(viGHIJx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 155 vaccKLMNx3 = wasm_f32x4_add(vaccKLMNx3, wasm_f32x4_mul(viKLMNx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 156 vaccOPQRx3 = wasm_f32x4_add(vaccOPQRx3, wasm_f32x4_mul(viOPQRx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 157 vaccSTUVx3 = wasm_f32x4_add(vaccSTUVx3, wasm_f32x4_mul(viSTUVx3, vw3)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
|
D | 16x1-minmax-wasmsimd-x86-x4.c | 104 const v128_t vw3 = wasm_v32x4_load_splat(w); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4() local 106 vacc0123x3 = wasm_f32x4_add(vacc0123x3, wasm_f32x4_mul(vi0123x3, vw3)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4() 107 vacc4567x3 = wasm_f32x4_add(vacc4567x3, wasm_f32x4_mul(vi4567x3, vw3)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4() 108 vacc89ABx3 = wasm_f32x4_add(vacc89ABx3, wasm_f32x4_mul(vi89ABx3, vw3)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4() 109 vaccCDEFx3 = wasm_f32x4_add(vaccCDEFx3, wasm_f32x4_mul(viCDEFx3, vw3)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_x4()
|
D | 16x1-minmax-wasmsimd-arm-x4.c | 104 const v128_t vw3 = wasm_v32x4_load_splat(w); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4() local 106 vacc0123x3 = wasm_f32x4_add(vacc0123x3, wasm_f32x4_mul(vi0123x3, vw3)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4() 107 vacc4567x3 = wasm_f32x4_add(vacc4567x3, wasm_f32x4_mul(vi4567x3, vw3)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4() 108 vacc89ABx3 = wasm_f32x4_add(vacc89ABx3, wasm_f32x4_mul(vi89ABx3, vw3)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4() 109 vaccCDEFx3 = wasm_f32x4_add(vaccCDEFx3, wasm_f32x4_mul(viCDEFx3, vw3)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_x4()
|
D | 8x1-minmax-wasmsimd-arm-x4.c | 82 const v128_t vw3 = wasm_v32x4_load_splat(w); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4() local 84 vacc0123x3 = wasm_f32x4_add(vacc0123x3, wasm_f32x4_mul(vi0123x3, vw3)); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4() 85 vacc4567x3 = wasm_f32x4_add(vacc4567x3, wasm_f32x4_mul(vi4567x3, vw3)); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_x4()
|
D | 8x1-minmax-wasmsimd-x86-x4.c | 82 const v128_t vw3 = wasm_v32x4_load_splat(w); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4() local 84 vacc0123x3 = wasm_f32x4_add(vacc0123x3, wasm_f32x4_mul(vi0123x3, vw3)); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4() 85 vacc4567x3 = wasm_f32x4_add(vacc4567x3, wasm_f32x4_mul(vi4567x3, vw3)); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_x4()
|
D | 4x1-minmax-wasmsimd-arm-x4.c | 71 const v128_t vw3 = wasm_v32x4_load_splat(w); in xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4() local 73 vacc0123x3 = wasm_f32x4_add(vacc0123x3, wasm_f32x4_mul(vi0123x3, vw3)); in xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_arm_x4()
|
D | 4x1-minmax-wasmsimd-x86-x4.c | 71 const v128_t vw3 = wasm_v32x4_load_splat(w); in xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4() local 73 vacc0123x3 = wasm_f32x4_add(vacc0123x3, wasm_f32x4_mul(vi0123x3, vw3)); in xnn_f32_spmm_minmax_ukernel_4x1__wasmsimd_x86_x4()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | scalar-2x4.c | 48 const float vw3 = w[3]; in xnn_f32_prelu_ukernel__scalar_2x4() local 64 const float vacc0x3 = XNN_UNPREDICTABLE(vi0x3 < 0.0f) ? vi0x3 * vw3 : vi0x3; in xnn_f32_prelu_ukernel__scalar_2x4() 68 const float vacc1x3 = XNN_UNPREDICTABLE(vi1x3 < 0.0f) ? vi1x3 * vw3 : vi1x3; in xnn_f32_prelu_ukernel__scalar_2x4()
|
D | wasm-2x4.c | 49 const float vw3 = w[3]; in xnn_f32_prelu_ukernel__wasm_2x4() local 82 vacc0x3 += vi0x3 * vw3; in xnn_f32_prelu_ukernel__wasm_2x4() 86 vacc1x3 += vi1x3 * vw3; in xnn_f32_prelu_ukernel__wasm_2x4()
|