/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasm-rr2-p6-x2.c | 66 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() local 69 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 72 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 75 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 78 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 86 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 89 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2()
|
D | velu-scalar-rr2-p6-x2.c | 74 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() local 77 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 80 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 83 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 86 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 94 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 97 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2()
|
D | velu-scalar-rr2-p6-x3.c | 85 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() local 89 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 93 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 97 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 101 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 112 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 116 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
|
D | velu-wasm-rr2-p6-x3.c | 73 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() local 77 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 81 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 85 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 89 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 100 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 104 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
|
D | velu-wasm-rr2-p6-x4.c | 80 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local 85 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 90 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 95 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 100 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 114 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 119 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
|
D | velu-scalar-rr2-p6-x4.c | 96 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local 101 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 106 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 111 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 116 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 130 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 135 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
|
D | velu-avx2-rr1-p6-x16.c | 61 __m256 vp0 = _mm256_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() local 64 vp0 = _mm256_fmadd_ps(vp0, vt0, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() 67 vp0 = _mm256_fmadd_ps(vp0, vt0, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() 70 vp0 = _mm256_fmadd_ps(vp0, vt0, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() 73 vp0 = _mm256_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() 79 vp0 = _mm256_fmadd_ps(vp0, vt0, vt0); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() 83 const __m256 ve0 = _mm256_fmadd_ps(vp0, valpha, vs0); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16()
|
D | velu-avx512f-rr1-p6-x32.c | 61 __m512 vp0 = _mm512_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() local 64 vp0 = _mm512_fmadd_ps(vp0, vt0, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 67 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 70 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 73 vp0 = _mm512_mul_ps(vp0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 81 vp0 = _mm512_fmadd_ps(vp0, vt0, vt0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 85 __m512 vy0 = _mm512_fmadd_ps(vp0, valpha, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32()
|
D | velu-wasm-rr2-p6-x5.c | 87 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local 93 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 99 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 105 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 111 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 128 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 134 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
|
D | velu-scalar-rr2-p6-x5.c | 107 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 113 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 119 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 125 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 131 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 148 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 154 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x16.c | 57 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() local 59 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 61 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 63 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 65 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 73 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
|
D | avx2-p5-x8.c | 63 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() local 65 vp0 = _mm256_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 67 vp0 = _mm256_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 69 vp0 = _mm256_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 71 vp0 = _mm256_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 79 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8()
|
D | avx512f-p5-scalef-x32.c | 61 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() local 64 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 67 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 70 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 73 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 82 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32()
|
D | avx512f-p5-scalef-x48.c | 65 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() local 69 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 73 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 77 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 81 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48() 91 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x48()
|
D | avx2-p5-x16.c | 67 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() local 70 vp0 = _mm256_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 73 vp0 = _mm256_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 76 vp0 = _mm256_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 79 vp0 = _mm256_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16() 88 __m256 vf0 = _mm256_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x16()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x16.c | 59 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() local 61 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 63 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 65 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 67 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 72 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16()
|
D | avx512f-p5-scalef-x32.c | 64 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() local 67 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 70 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 73 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 76 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 82 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32()
|
D | avx512f-p5-scalef-x48.c | 69 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x48() local 73 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x48() 77 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x48() 81 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x48() 85 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x48() 92 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x48()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-p5-x2.c | 78 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() local 81 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 84 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 87 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 97 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
|
D | scalar-p5-x2-acc2.c | 79 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() local 82 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 85 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 88 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 98 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
|
D | scalar-p5-x4-acc2.c | 93 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local 98 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 103 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 108 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 122 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
|
D | scalar-p5-x4.c | 92 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local 97 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 102 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 107 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 121 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
|
D | scalar-p5-x4-acc4.c | 95 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local 100 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 105 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 110 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 124 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-p5-div-x2.c | 62 float vp0 = vt0 * vc5 + vc4; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 65 vp0 = vt0 * vp0 + vc3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 68 vp0 = vt0 * vp0 + vc2; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 71 vp0 = vt0 * vp0 + vc1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 77 const float ve0 = vt0 * vp0 + vs0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | avx512f-rr1-p5-scalef-div-x32.c | 54 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32() local 57 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32() 60 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32() 63 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32() 66 vp0 = _mm512_fmadd_ps(vp0, vt0, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32() 69 const __m512 ve0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32()
|