/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-lut64-p2-x2-acc2.c | 62 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() local 75 const uint32_t ve1 = (fp32_to_bits(vn1) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() 79 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() 86 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() 91 float vt1 = vn1 * vminus_ln2_o64_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() 94 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
|
D | scalar-lut64-p2-x2.c | 61 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() local 74 const uint32_t ve1 = (fp32_to_bits(vn1) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() 78 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() 85 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() 90 float vt1 = vn1 * vminus_ln2_o64_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() 93 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
|
D | scalar-p5-x2-acc2.c | 59 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() local 64 const float vs1 = fp32_from_bits(fp32_to_bits(vn1) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 68 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 73 float vt1 = vn1 * vminus_ln2_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 76 vt1 = vn1 * vminus_ln2_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
|
D | scalar-p5-x2.c | 58 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() local 63 const float vs1 = fp32_from_bits(fp32_to_bits(vn1) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 67 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 72 float vt1 = vn1 * vminus_ln2_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 75 vt1 = vn1 * vminus_ln2_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
|
D | scalar-lut64-p2-x4-acc2.c | 66 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local 81 const uint32_t ve1 = (fp32_to_bits(vn1) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 87 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 98 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 105 float vt1 = vn1 * vminus_ln2_o64_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 110 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
|
D | scalar-lut64-p2-x4.c | 65 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local 80 const uint32_t ve1 = (fp32_to_bits(vn1) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 86 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 97 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 104 float vt1 = vn1 * vminus_ln2_o64_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 109 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
|
D | scalar-lut64-p2-x4-acc4.c | 68 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local 83 const uint32_t ve1 = (fp32_to_bits(vn1) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 89 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 100 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 107 float vt1 = vn1 * vminus_ln2_o64_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 112 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
|
D | scalar-p5-x4.c | 62 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local 69 const float vs1 = fp32_from_bits(fp32_to_bits(vn1) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 75 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 82 float vt1 = vn1 * vminus_ln2_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 87 vt1 = vn1 * vminus_ln2_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
|
D | scalar-p5-x4-acc2.c | 63 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local 70 const float vs1 = fp32_from_bits(fp32_to_bits(vn1) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 76 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 83 float vt1 = vn1 * vminus_ln2_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 88 vt1 = vn1 * vminus_ln2_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
|
D | scalar-p5-x4-acc4.c | 65 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local 72 const float vs1 = fp32_from_bits(fp32_to_bits(vn1) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 78 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 85 float vt1 = vn1 * vminus_ln2_hi + vx1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 90 vt1 = vn1 * vminus_ln2_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-lut2048-p1-div-x2.c | 67 float vn1 = vz1 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() local 80 const uint32_t ve1 = (fp32_to_bits(vn1) & ~vindex_mask) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 84 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 91 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 96 float vt1 = vn1 * vln2_o2048_hi + vz1; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 99 vt1 = vn1 * vln2_o2048_lo + vt1; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
|
D | scalar-lut64-p2-div-x2.c | 67 float vn1 = vz1 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() local 80 const uint32_t ve1 = (fp32_to_bits(vn1) & ~vindex_mask) << 17; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 84 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 91 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 96 float vt1 = vn1 * vln2_o64_hi + vz1; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 99 vt1 = vn1 * vln2_o64_lo + vt1; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
|
D | scalar-lut2048-p1-div-x4.c | 71 float vn1 = vz1 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local 86 const uint32_t ve1 = (fp32_to_bits(vn1) & ~vindex_mask) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 92 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 103 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 110 float vt1 = vn1 * vln2_o2048_hi + vz1; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 115 vt1 = vn1 * vln2_o2048_lo + vt1; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
|
D | scalar-p5-div-x2.c | 65 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 70 const float vs1 = fp32_from_bits(fp32_to_bits(vn1) << 23); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 74 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 79 float vt1 = vn1 * vln2_hi + vz1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 82 vt1 = vn1 * vln2_lo + vt1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | scalar-lut64-p2-div-x4.c | 71 float vn1 = vz1 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local 86 const uint32_t ve1 = (fp32_to_bits(vn1) & ~vindex_mask) << 17; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 92 const uint32_t vidx1 = fp32_to_bits(vn1) & vindex_mask; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 103 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 110 float vt1 = vn1 * vln2_o64_hi + vz1; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 115 vt1 = vn1 * vln2_o64_lo + vt1; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
|
D | scalar-p5-div-x4.c | 69 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 76 const float vs1 = fp32_from_bits(fp32_to_bits(vn1) << 23); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 82 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 89 float vt1 = vn1 * vln2_hi + vz1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 94 vt1 = vn1 * vln2_lo + vt1; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
D | avx2-rr1-p5-div-x16.c | 65 __m256 vn1 = _mm256_fmadd_ps(vz1, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() local 70 const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn1), 23)); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() 74 vn1 = _mm256_sub_ps(vn1, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() 78 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vz1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16()
|
D | avx2-rr1-p5-nr1fma-x16.c | 65 __m256 vn1 = _mm256_fmadd_ps(vz1, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local 70 const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn1), 23)); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 74 vn1 = _mm256_sub_ps(vn1, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 78 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vz1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
|
/external/python/cpython3/Modules/_decimal/libmpdec/ |
D | typearith.h | 133 vn1, vn0, in _mpd_div_words() local 143 vn1 = v >> 32; in _mpd_div_words() 153 q1 = un32 / vn1; in _mpd_div_words() 154 rhat = un32 - q1*vn1; in _mpd_div_words() 158 rhat = rhat + vn1; in _mpd_div_words() 189 q0 = un21 / vn1; in _mpd_div_words() 190 rhat = un21 - q0*vn1; in _mpd_div_words() 194 rhat = rhat + vn1; in _mpd_div_words() 390 vn1, vn0, in _mpd_div_words() local 400 vn1 = v >> 16; in _mpd_div_words() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x16.c | 57 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() local 62 const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn1), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 66 vn1 = _mm256_sub_ps(vn1, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 71 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 74 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
|
D | avx2-p5-x24.c | 59 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24() local 65 const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn1), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24() 70 vn1 = _mm256_sub_ps(vn1, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24() 76 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24() 80 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x24()
|
D | avx2-p5-x32.c | 61 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() local 68 const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn1), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 74 vn1 = _mm256_sub_ps(vn1, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 81 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32() 86 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x32()
|
D | avx512f-p5-scalef-x32.c | 53 __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() local 58 __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 61 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 83 __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32()
|
D | avx2-p5-x40.c | 63 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() local 71 const __m256 vs1 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn1), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 78 vn1 = _mm256_sub_ps(vn1, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 86 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40() 92 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x40()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x32.c | 50 const __m512 vn1 = _mm512_roundscale_ps(_mm512_mul_ps(vx1, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() local 55 __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 58 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 86 const __m512 ve1 = _mm512_add_ps(vn1, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32()
|