/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c8-minmax-wasmsimd-x86-2x.c | 90 v128_t vacc0 = wasm_v128_load(i0); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() local 97 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 100 vacc0 = wasm_v128_bitselect(vmin, vacc0, wasm_f32x4_lt(vacc0, vmin)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 103 vacc0 = wasm_v128_bitselect(vacc0, vmax, wasm_f32x4_le(vacc0, vmax)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 106 wasm_v128_store(o0, vacc0); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 116 v128_t vacc0 = wasm_v128_load(i0); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() local 123 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 126 vacc0 = wasm_v128_bitselect(vmin, vacc0, wasm_f32x4_lt(vacc0, vmin)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 129 vacc0 = wasm_v128_bitselect(vacc0, vmax, wasm_f32x4_le(vacc0, vmax)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 133 *((double*) o0) = wasm_f64x2_extract_lane(vacc0, 0); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() [all …]
|
D | c8-minmax-wasmsimd-arm-2x.c | 90 v128_t vacc0 = wasm_v128_load(i0); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() local 97 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 100 vacc0 = wasm_f32x4_max(vacc0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 103 vacc0 = wasm_f32x4_min(vacc0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 106 wasm_v128_store(o0, vacc0); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 116 v128_t vacc0 = wasm_v128_load(i0); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() local 123 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 126 vacc0 = wasm_f32x4_max(vacc0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 129 vacc0 = wasm_f32x4_min(vacc0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 133 *((double*) o0) = wasm_f64x2_extract_lane(vacc0, 0); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() [all …]
|
D | c4-minmax-wasmsimd-x86-2x.c | 78 v128_t vacc0 = wasm_v128_load(i0); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x() local 85 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x() 88 vacc0 = wasm_v128_bitselect(vmin, vacc0, wasm_f32x4_lt(vacc0, vmin)); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x() 91 vacc0 = wasm_v128_bitselect(vacc0, vmax, wasm_f32x4_le(vacc0, vmax)); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x() 95 *((double*) o0) = wasm_f64x2_extract_lane(vacc0, 0); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x() 98 vacc0 = wasm_v32x4_shuffle(vacc0, vacc0, 2, 3, 2, 3); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x() 105 *o0++ = wasm_f32x4_extract_lane(vacc0, 0); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x()
|
D | c4-minmax-wasmsimd-arm-2x.c | 78 v128_t vacc0 = wasm_v128_load(i0); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x() local 85 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x() 88 vacc0 = wasm_f32x4_max(vacc0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x() 91 vacc0 = wasm_f32x4_min(vacc0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x() 95 *((double*) o0) = wasm_f64x2_extract_lane(vacc0, 0); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x() 98 vacc0 = wasm_v32x4_shuffle(vacc0, vacc0, 2, 3, 2, 3); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x() 105 *o0++ = wasm_f32x4_extract_lane(vacc0, 0); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x()
|
D | c1-minmax-scalar-2x.c | 50 float vacc0 = *i0++; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x() local 55 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x() 58 vacc0 = math_max_f32(vacc0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x() 61 vacc0 = math_min_f32(vacc0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x() 64 *o0++ = vacc0; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x()
|
D | c1-minmax-wasm-2x.c | 50 float vacc0 = *i0++; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x() local 55 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x() 58 vacc0 = __builtin_wasm_max_f32(vacc0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x() 61 vacc0 = __builtin_wasm_min_f32(vacc0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x() 64 *o0++ = vacc0; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x()
|
D | c2-minmax-wasm-2x.c | 89 float vacc0 = *i0++; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() local 94 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 97 vacc0 = __builtin_wasm_max_f32(vacc0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 100 vacc0 = __builtin_wasm_min_f32(vacc0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 103 *o0++ = vacc0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 39 __m512 vacc0 = _mm512_setzero_ps(); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 195 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 196 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 197 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 198 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 199 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 200 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 201 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 202 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 203 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x160.c | 39 __m512 vacc0 = _mm512_setzero_ps(); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 173 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 174 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 175 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 176 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 177 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 178 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 179 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 180 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 181 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 39 __m512 vacc0 = _mm512_setzero_ps(); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 162 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 163 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 164 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 165 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 166 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 167 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 168 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 169 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 170 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x128.c | 39 __m512 vacc0 = _mm512_setzero_ps(); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local 151 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 152 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 153 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 154 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 155 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 156 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 157 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 158 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 161 __m512 vacc = vacc0; in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 40 __m512 vacc0 = _mm512_setzero_ps(); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 211 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 212 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 213 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 214 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 215 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 216 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 217 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 218 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 219 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x160.c | 40 __m512 vacc0 = _mm512_setzero_ps(); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 187 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 188 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 189 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 190 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 191 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 192 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 193 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 194 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 195 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 40 __m512 vacc0 = _mm512_setzero_ps(); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 175 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 176 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 177 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 178 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 179 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 180 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 181 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 182 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 183 vacc0 = _mm512_add_ps(vacc0, vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | wasmsimd-p5-x16-acc4.c | 43 v128_t vacc0 = wasm_f32x4_splat(0.0f); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() local 44 v128_t vacc1 = vacc0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 45 v128_t vacc2 = vacc0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 46 v128_t vacc3 = vacc0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 142 vacc0 = wasm_f32x4_add(vacc0, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 143 vacc0 = wasm_f32x4_add(vacc0, vf4567); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 144 vacc0 = wasm_f32x4_add(vacc0, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 145 vacc0 = wasm_f32x4_add(vacc0, vfCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 148 vacc0 = wasm_f32x4_add(vacc0, vacc1); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() 150 vacc0 = wasm_f32x4_add(vacc0, vacc2); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x16_acc4() [all …]
|
D | avx512f-p5-scalef-x128.c | 40 __m512 vacc0 = _mm512_setzero_ps(); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() local 163 vacc0 = _mm512_add_ps(vacc0, vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 164 vacc0 = _mm512_add_ps(vacc0, vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 165 vacc0 = _mm512_add_ps(vacc0, vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 166 vacc0 = _mm512_add_ps(vacc0, vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 167 vacc0 = _mm512_add_ps(vacc0, vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 168 vacc0 = _mm512_add_ps(vacc0, vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 169 vacc0 = _mm512_add_ps(vacc0, vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 170 vacc0 = _mm512_add_ps(vacc0, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 173 __m512 vacc = vacc0; in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x1-minmax-scalar.c | 110 float vacc0 = *w++; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 111 float vacc1 = vacc0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 112 float vacc2 = vacc0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 113 float vacc3 = vacc0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 114 float vacc4 = vacc0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 115 float vacc5 = vacc0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 116 float vacc6 = vacc0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 117 float vacc7 = vacc0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 131 vacc0 += vi0 * vw; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 141 float vout0 = math_min_f32(vacc0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() [all …]
|
/external/XNNPACK/src/f32-hswish/gen/ |
D | hswish-wasm-x2.c | 38 float vacc0 = vx0 + vthree; in xnn_f32_hswish_ukernel__wasm_x2() local 43 vacc0 = __builtin_wasm_max_f32(vacc0, vzero); in xnn_f32_hswish_ukernel__wasm_x2() 46 vacc0 = __builtin_wasm_min_f32(vacc0, vsix); in xnn_f32_hswish_ukernel__wasm_x2() 49 vacc0 *= vx0; in xnn_f32_hswish_ukernel__wasm_x2() 52 y[0] = vacc0; in xnn_f32_hswish_ukernel__wasm_x2()
|
D | hswish-scalar-x2.c | 38 float vacc0 = vx0 + vthree; in xnn_f32_hswish_ukernel__scalar_x2() local 43 vacc0 = math_max_f32(vacc0, vzero); in xnn_f32_hswish_ukernel__scalar_x2() 46 vacc0 = math_min_f32(vacc0, vsix); in xnn_f32_hswish_ukernel__scalar_x2() 49 vacc0 *= vx0; in xnn_f32_hswish_ukernel__scalar_x2() 52 y[0] = vacc0; in xnn_f32_hswish_ukernel__scalar_x2()
|
D | hswish-wasm-x4.c | 40 float vacc0 = vx0 + vthree; in xnn_f32_hswish_ukernel__wasm_x4() local 49 vacc0 = __builtin_wasm_max_f32(vacc0, vzero); in xnn_f32_hswish_ukernel__wasm_x4() 54 vacc0 = __builtin_wasm_min_f32(vacc0, vsix); in xnn_f32_hswish_ukernel__wasm_x4() 59 vacc0 *= vx0; in xnn_f32_hswish_ukernel__wasm_x4() 64 y[0] = vacc0; in xnn_f32_hswish_ukernel__wasm_x4()
|
D | hswish-scalar-x4.c | 40 float vacc0 = vx0 + vthree; in xnn_f32_hswish_ukernel__scalar_x4() local 49 vacc0 = math_max_f32(vacc0, vzero); in xnn_f32_hswish_ukernel__scalar_x4() 54 vacc0 = math_min_f32(vacc0, vsix); in xnn_f32_hswish_ukernel__scalar_x4() 59 vacc0 *= vx0; in xnn_f32_hswish_ukernel__scalar_x4() 64 y[0] = vacc0; in xnn_f32_hswish_ukernel__scalar_x4()
|
/external/XNNPACK/src/f32-clamp/gen/ |
D | wasm-x2.c | 32 float vacc0 = x[0]; in xnn_f32_clamp_ukernel__wasm_x2() local 36 vacc0 = __builtin_wasm_max_f32(vacc0, vy_min); in xnn_f32_clamp_ukernel__wasm_x2() 39 vacc0 = __builtin_wasm_min_f32(vacc0, vy_max); in xnn_f32_clamp_ukernel__wasm_x2() 42 y[0] = vacc0; in xnn_f32_clamp_ukernel__wasm_x2()
|
D | scalar-x2.c | 32 float vacc0 = x[0]; in xnn_f32_clamp_ukernel__scalar_x2() local 36 vacc0 = math_max_f32(vacc0, vy_min); in xnn_f32_clamp_ukernel__scalar_x2() 39 vacc0 = math_min_f32(vacc0, vy_max); in xnn_f32_clamp_ukernel__scalar_x2() 42 y[0] = vacc0; in xnn_f32_clamp_ukernel__scalar_x2()
|
D | scalar-x4.c | 32 float vacc0 = x[0]; in xnn_f32_clamp_ukernel__scalar_x4() local 38 vacc0 = math_max_f32(vacc0, vy_min); in xnn_f32_clamp_ukernel__scalar_x4() 43 vacc0 = math_min_f32(vacc0, vy_max); in xnn_f32_clamp_ukernel__scalar_x4() 48 y[0] = vacc0; in xnn_f32_clamp_ukernel__scalar_x4()
|
D | wasm-x4.c | 32 float vacc0 = x[0]; in xnn_f32_clamp_ukernel__wasm_x4() local 38 vacc0 = __builtin_wasm_max_f32(vacc0, vy_min); in xnn_f32_clamp_ukernel__wasm_x4() 43 vacc0 = __builtin_wasm_min_f32(vacc0, vy_max); in xnn_f32_clamp_ukernel__wasm_x4() 48 y[0] = vacc0; in xnn_f32_clamp_ukernel__wasm_x4()
|