/external/XNNPACK/src/u8-clamp/ |
D | scalar.c | 26 uint8_t vt3 = x[3]; in xnn_u8_clamp_ukernel__scalar() local 32 vt3 = vt3 < voutput_min ? voutput_min : vt3; in xnn_u8_clamp_ukernel__scalar() 37 vt3 = vt3 > voutput_max ? voutput_max : vt3; in xnn_u8_clamp_ukernel__scalar() 42 y[3] = vt3; in xnn_u8_clamp_ukernel__scalar()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-p5-x4.c | 84 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local 89 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 95 float vp3 = vc5 * vt3 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 100 vp3 = vp3 * vt3 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 105 vp3 = vp3 * vt3 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 110 vp3 = vp3 * vt3 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 119 vt3 *= vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 124 float vf3 = vt3 * vp3 + vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
|
D | scalar-p5-x4-acc2.c | 85 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local 90 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 96 float vp3 = vc5 * vt3 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 101 vp3 = vp3 * vt3 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 106 vp3 = vp3 * vt3 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 111 vp3 = vp3 * vt3 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 120 vt3 *= vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 125 float vf3 = vt3 * vp3 + vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
|
D | scalar-p5-x4-acc4.c | 87 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local 92 vt3 = vn3 * vminus_ln2_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 98 float vp3 = vc5 * vt3 + vc4; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 103 vp3 = vp3 * vt3 + vc3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 108 vp3 = vp3 * vt3 + vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 113 vp3 = vp3 * vt3 + vc1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 122 vt3 *= vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 127 float vf3 = vt3 * vp3 + vs3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
|
D | scalar-lut64-p2-x4-acc2.c | 107 float vt3 = vn3 * vminus_ln2_o64_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local 112 vt3 = vn3 * vminus_ln2_o64_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 118 float vp3 = vt3 * vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 123 vp3 = vp3 * vt3 + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
|
D | scalar-lut64-p2-x4.c | 106 float vt3 = vn3 * vminus_ln2_o64_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local 111 vt3 = vn3 * vminus_ln2_o64_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 117 float vp3 = vt3 * vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 122 vp3 = vp3 * vt3 + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
|
D | scalar-lut64-p2-x4-acc4.c | 109 float vt3 = vn3 * vminus_ln2_o64_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local 114 vt3 = vn3 * vminus_ln2_o64_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 120 float vp3 = vt3 * vc2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 125 vp3 = vp3 * vt3 + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-p5-div-x4.c | 91 float vt3 = vn3 * vln2_hi + vz3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 96 vt3 = vn3 * vln2_lo + vt3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 103 float vp3 = vt3 * vc5 + vc4; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 108 vp3 = vt3 * vp3 + vc3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 113 vp3 = vt3 * vp3 + vc2; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 118 vp3 = vt3 * vp3 + vc1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 127 vt3 *= vs3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 132 const float ve3 = vt3 * vp3 + vs3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
D | scalar-lut64-p2-div-x4.c | 112 float vt3 = vn3 * vln2_o64_hi + vz3; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local 117 vt3 = vn3 * vln2_o64_lo + vt3; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 124 float vp3 = vt3 * vc2; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 129 vp3 = vt3 - vp3 * vt3; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
|
D | avx2-rr1-p5-div-x32.c | 90 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() local 96 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() 101 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() 106 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() 111 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() 120 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() 125 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x32.c | 83 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() local 88 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 94 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 99 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 104 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 109 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 118 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 123 __m256 vf3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32()
|
D | avx2-p5-x40.c | 88 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() local 94 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 101 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 107 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 113 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 119 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 129 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 135 __m256 vf3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
|
D | avx512f-p5-scalef-x64.c | 66 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() local 71 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 77 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 82 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 87 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 92 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64() 97 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x64()
|
D | avx2-p5-x48.c | 93 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() local 100 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 108 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 115 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 122 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 129 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 140 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 147 __m256 vf3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
|
D | avx512f-p5-scalef-x80.c | 69 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() local 75 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 82 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 88 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 94 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 100 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 106 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
|
D | avx512f-p5-scalef-x96.c | 72 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() local 79 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 87 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 94 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 101 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 108 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 115 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
|
D | avx2-p5-x56.c | 98 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local 106 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 115 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 123 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 131 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 139 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 151 vt3 = _mm256_mul_ps(vt3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 159 __m256 vf3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
|
D | avx512f-p5-scalef-x112.c | 75 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local 83 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 92 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 100 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 108 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 116 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 124 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x64.c | 61 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() local 66 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 72 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 77 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 82 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 87 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() 92 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64()
|
D | avx512f-p5-scalef-x80.c | 63 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() local 69 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 76 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 82 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 88 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 94 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 100 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
|
D | avx2-p5-x32.c | 67 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() local 72 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 78 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 83 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 88 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 93 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 98 vp3 = _mm256_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32()
|
D | avx512f-p5-scalef-x96.c | 65 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local 72 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 80 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 87 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 94 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 101 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 108 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
|
D | avx2-p5-x40.c | 69 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local 75 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 82 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 88 vp3 = _mm256_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 94 vp3 = _mm256_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 100 vp3 = _mm256_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 106 vp3 = _mm256_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
|
D | avx512f-p5-scalef-x112.c | 67 __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 75 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 84 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 92 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 100 vp3 = _mm512_fmadd_ps(vp3, vt3, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 108 vp3 = _mm512_fmadd_ps(vp3, vt3, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 116 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
/external/libvpx/libvpx/vp8/common/mips/msa/ |
D | idct_msa.c | 90 v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3; in idct4x4_addblk_msa() local 100 VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3); in idct4x4_addblk_msa() 101 SRARI_W4_SW(vt0, vt1, vt2, vt3, 3); in idct4x4_addblk_msa() 102 TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3); in idct4x4_addblk_msa() 108 ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3); in idct4x4_addblk_msa() 183 v4i32 hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3, res0, res1, res2, res3; in dequant_idct4x4_addblk_msa() local 196 VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3); in dequant_idct4x4_addblk_msa() 197 SRARI_W4_SW(vt0, vt1, vt2, vt3, 3); in dequant_idct4x4_addblk_msa() 198 TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3); in dequant_idct4x4_addblk_msa() 204 ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3); in dequant_idct4x4_addblk_msa() [all …]
|