/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-p6-x2.c | 59 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() local 62 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 67 vt0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 74 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 77 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 80 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 83 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 86 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 89 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 94 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2()
|
D | velu-wasm-rr2-p6-x2.c | 59 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() local 62 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 66 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 69 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 72 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 75 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 78 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 81 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 86 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2()
|
D | velu-scalar-rr2-p6-x3.c | 64 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() local 68 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 74 vt0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 85 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 89 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 93 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 97 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 101 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 105 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 112 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
|
D | velu-wasm-rr2-p6-x3.c | 64 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() local 68 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 73 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 77 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 81 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 85 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 89 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 93 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 100 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
|
D | velu-scalar-rr2-p6-x4.c | 69 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local 74 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 81 vt0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 96 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 101 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 106 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 111 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 116 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 121 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 130 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
|
D | velu-scalar-rr2-lut16-p3-x2.c | 61 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() local 66 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() 69 vt0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() 77 float vp0 = vc3 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() 80 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() 83 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() 88 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2()
|
D | velu-wasm-rr2-p6-x4.c | 69 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local 74 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 80 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 85 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 90 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 95 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 100 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 105 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 114 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
|
D | velu-wasm-rr2-lut16-p3-x2.c | 61 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() local 66 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() 69 float vp0 = vc3 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() 72 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() 75 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() 80 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2()
|
D | velu-scalar-rr2-p6-x5.c | 74 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 80 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 88 vt0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 107 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 113 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 119 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 125 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 131 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 137 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 148 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
D | velu-scalar-rr2-lut16-p3-x3.c | 67 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() local 74 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 77 vt0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 90 float vp0 = vc3 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 94 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 98 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 105 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
|
D | velu-wasm-rr2-p6-x5.c | 74 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local 80 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 87 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 93 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 99 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 105 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 111 vp0 *= vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 117 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 128 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
|
D | velu-scalar-rr2-p6-x6.c | 79 float vt0 = vn0 * vminus_ln2_hi + vz0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 86 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 95 vt0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 118 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 125 vp0 = vp0 * vt0 + vc4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 132 vp0 = vp0 * vt0 + vc3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 139 vp0 = vp0 * vt0 + vc2; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 146 vp0 *= vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 153 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 166 vp0 = vp0 * vt0 + vt0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
/external/XNNPACK/src/u8-clamp/ |
D | scalar-x4.c | 23 uint8_t vt0 = x[0]; in xnn_u8_clamp_ukernel__scalar_x4() local 29 vt0 = XNN_UNPREDICTABLE(vt0 < voutput_min) ? voutput_min : vt0; in xnn_u8_clamp_ukernel__scalar_x4() 34 vt0 = XNN_UNPREDICTABLE(vt0 > voutput_max) ? voutput_max : vt0; in xnn_u8_clamp_ukernel__scalar_x4() 39 y[0] = vt0; in xnn_u8_clamp_ukernel__scalar_x4()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-p5-x2.c | 71 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() local 74 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 78 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 81 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 84 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 87 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 94 vt0 *= vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 97 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
|
D | scalar-p5-x2-acc2.c | 72 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() local 75 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 79 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 82 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 85 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 88 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 95 vt0 *= vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 98 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
|
D | scalar-p5-x4-acc2.c | 82 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local 87 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 93 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 98 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 103 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 108 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 117 vt0 *= vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 122 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
|
D | scalar-p5-x4.c | 81 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local 86 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 92 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 97 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 102 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 107 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 116 vt0 *= vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 121 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
|
D | scalar-p5-x4-acc4.c | 84 float vt0 = vn0 * vminus_ln2_hi + vx0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local 89 vt0 = vn0 * vminus_ln2_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 95 float vp0 = vc5 * vt0 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 100 vp0 = vp0 * vt0 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 105 vp0 = vp0 * vt0 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 110 vp0 = vp0 * vt0 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 119 vt0 *= vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 124 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-p5-div-x2.c | 56 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 59 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 62 float vp0 = vt0 * vc5 + vc4; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 65 vp0 = vt0 * vp0 + vc3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 68 vp0 = vt0 * vp0 + vc2; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 71 vp0 = vt0 * vp0 + vc1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 74 vt0 *= vs0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 77 const float ve0 = vt0 * vp0 + vs0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | scalar-p5-div-x4.c | 66 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 71 vt0 = vn0 * vln2_lo + vt0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 76 float vp0 = vt0 * vc5 + vc4; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 81 vp0 = vt0 * vp0 + vc3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 86 vp0 = vt0 * vp0 + vc2; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 91 vp0 = vt0 * vp0 + vc1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 96 vt0 *= vs0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 101 const float ve0 = vt0 * vp0 + vs0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x8.c | 65 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() local 67 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() 70 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() 72 vp0 = _mm256_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() 74 vp0 = _mm256_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() 76 vp0 = _mm256_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() 82 vt0 = _mm256_mul_ps(vt0, vs0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() 84 __m256 vf0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
|
D | avx512f-p5-scalef-x16.c | 54 __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() local 56 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 59 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 61 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 63 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 65 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 67 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16()
|
D | avx2-p5-x16.c | 70 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() local 73 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 77 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 80 vp0 = _mm256_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 83 vp0 = _mm256_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 86 vp0 = _mm256_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 93 vt0 = _mm256_mul_ps(vt0, vs0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 96 __m256 vf0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x16.c | 52 __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() local 54 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 57 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 59 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 61 vp0 = _mm512_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 63 vp0 = _mm512_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 65 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
|
D | avx2-p5-x8.c | 58 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() local 60 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 63 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 65 vp0 = _mm256_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 67 vp0 = _mm256_fmadd_ps(vp0, vt0, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 69 vp0 = _mm256_fmadd_ps(vp0, vt0, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8() 71 vp0 = _mm256_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x8()
|