/external/llvm-project/llvm/test/CodeGen/X86/ |
D | tailcall-ri64.ll | 13 %vt = type { i32 (...)** } 15 define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class* 16 %this, %vt* %Ty) align 2 { 18 %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)*** 19 %vtable = load %vt* (%vt*, %class*)**, %vt* (%vt*, %class*)*** %0, align 8 20 %vfn = getelementptr inbounds %vt* (%vt*, %class*)*, %vt* (%vt*, %class*)** %vtable, i64 4 21 %1 = load %vt* (%vt*, %class*)*, %vt* (%vt*, %class*)** %vfn, align 8 22 %call = tail call %vt* %1(%vt* %Ty, %class* %this) 23 ret %vt* %call
|
/external/llvm/test/CodeGen/X86/ |
D | tailcall-ri64.ll | 13 %vt = type { i32 (...)** } 15 define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class* 16 %this, %vt* %Ty) align 2 { 18 %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)*** 19 %vtable = load %vt* (%vt*, %class*)**, %vt* (%vt*, %class*)*** %0, align 8 20 %vfn = getelementptr inbounds %vt* (%vt*, %class*)*, %vt* (%vt*, %class*)** %vtable, i64 4 21 %1 = load %vt* (%vt*, %class*)*, %vt* (%vt*, %class*)** %vfn, align 8 22 %call = tail call %vt* %1(%vt* %Ty, %class* %this) 23 ret %vt* %call
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x4.c | 55 v128_t vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() local 57 vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() 59 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() 61 v128_t vp = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt), vc5); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() 62 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc4); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() 63 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc3); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() 64 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() 65 vp = wasm_f32x4_mul(vp, vt); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() 67 vt = wasm_f32x4_mul(vt, vs); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() 69 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vt); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4() [all …]
|
D | velu-sse41-rr2-p6-x4.c | 55 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() local 56 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() 58 __m128 vp = _mm_add_ps(_mm_mul_ps(vc6, vt), vc5); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() 59 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc4); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() 60 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc3); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() 61 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc2); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() 62 vp = _mm_mul_ps(vp, vt); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() 64 vt = _mm_mul_ps(vt, vs); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() 66 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vt); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() 84 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__sse41_rr2_p6_x4() local [all …]
|
D | velu-wasmsimd-arm-rr2-p6-x4.c | 55 v128_t vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() local 56 vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() 58 v128_t vp = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt), vc5); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() 59 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc4); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() 60 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc3); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() 61 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() 62 vp = wasm_f32x4_mul(vp, vt); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() 64 vt = wasm_f32x4_mul(vt, vs); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() 66 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vt); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() 85 v128_t vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4() local [all …]
|
D | velu-neon-rr2-p6-x4.c | 54 float32x4_t vt = vmlaq_f32(vz, vn, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() local 55 vt = vmlaq_f32(vt, vn, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() 57 float32x4_t vp = vmlaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() 58 vp = vmlaq_f32(vc4, vp, vt); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() 59 vp = vmlaq_f32(vc3, vp, vt); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() 60 vp = vmlaq_f32(vc2, vp, vt); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() 61 vp = vmulq_f32(vp, vt); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() 63 vt = vmulq_f32(vt, vs); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() 65 vp = vmlaq_f32(vt, vp, vt); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() 83 float32x4_t vt = vmlaq_f32(vz, vn, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x4() local [all …]
|
D | velu-avx-rr2-p6-x8.c | 55 __m256 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() local 57 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 59 __m256 vp = _mm256_add_ps(_mm256_mul_ps(vc6, vt), vc5); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 60 vp = _mm256_add_ps(_mm256_mul_ps(vp, vt), vc4); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 61 vp = _mm256_add_ps(_mm256_mul_ps(vp, vt), vc3); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 62 vp = _mm256_add_ps(_mm256_mul_ps(vp, vt), vc2); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 63 vp = _mm256_mul_ps(vp, vt); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 65 vt = _mm256_mul_ps(vt, vs); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 67 vp = _mm256_add_ps(_mm256_mul_ps(vp, vt), vt); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 90 __m256 vt = _mm256_add_ps(_mm256_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() local [all …]
|
D | velu-sse2-rr2-p6-x4.c | 55 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() local 56 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() 58 __m128 vp = _mm_add_ps(_mm_mul_ps(vc6, vt), vc5); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() 59 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc4); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() 60 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc3); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() 61 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc2); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() 62 vp = _mm_mul_ps(vp, vt); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() 64 vt = _mm_mul_ps(vt, vs); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() 66 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vt); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() 85 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__sse2_rr2_p6_x4() local [all …]
|
D | velu-neonfma-rr1-p6-x4.c | 53 float32x4_t vt = vfmaq_f32(vz, vn, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() local 55 float32x4_t vp = vfmaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() 56 vp = vfmaq_f32(vc4, vp, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() 57 vp = vfmaq_f32(vc3, vp, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() 58 vp = vfmaq_f32(vc2, vp, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() 59 vp = vmulq_f32(vp, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() 61 vt = vmulq_f32(vt, vs); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() 63 vp = vfmaq_f32(vt, vp, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() 81 float32x4_t vt = vfmaq_f32(vz, vn, vminus_ln2); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() local 83 float32x4_t vp = vfmaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x4() [all …]
|
D | velu-avx512f-rr1-p6-x16.c | 53 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() local 55 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 56 vp = _mm512_fmadd_ps(vp, vt, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 57 vp = _mm512_fmadd_ps(vp, vt, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 58 vp = _mm512_fmadd_ps(vp, vt, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 59 vp = _mm512_mul_ps(vp, vt); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 61 vt = _mm512_mul_ps(vt, vs); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 63 vp = _mm512_fmadd_ps(vp, vt, vt); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() 87 __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() local 89 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x16() [all …]
|
D | velu-avx2-rr1-p6-x8.c | 52 __m256 vt = _mm256_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() local 54 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() 55 vp = _mm256_fmadd_ps(vp, vt, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() 56 vp = _mm256_fmadd_ps(vp, vt, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() 57 vp = _mm256_fmadd_ps(vp, vt, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() 58 vp = _mm256_mul_ps(vp, vt); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() 60 vt = _mm256_mul_ps(vt, vs); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() 62 vp = _mm256_fmadd_ps(vp, vt, vt); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() 84 __m256 vt = _mm256_fmadd_ps(vn, vminus_ln2, vz); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() local 86 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x8() [all …]
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x4.c | 66 v128_t vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local 68 vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() 70 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() 72 v128_t vp = wasm_f32x4_add(wasm_f32x4_mul(vc3, vt), vc2); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() 73 vp = wasm_f32x4_mul(vp, vt); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() 75 vt = wasm_f32x4_mul(vt, vs); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() 77 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vt); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() 107 v128_t vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_hi), vz); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local 109 vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_lo), vt); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() 111 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() [all …]
|
/external/XNNPACK/src/f32-velu/ |
D | scalar-rr2-p6.c.in | 60 float vt${N} = vn${N} * vminus_ln2_hi + vz${N}; 63 vt${N} = vn${N} * vminus_ln2_lo + vt${N}; 69 vt${N} = 0.0f; 73 float vp${N} = vc6 * vt${N} + vc5; 76 vp${N} = vp${N} * vt${N} + vc4; 79 vp${N} = vp${N} * vt${N} + vc3; 82 vp${N} = vp${N} * vt${N} + vc2; 85 vp${N} *= vt${N}; 88 vt${N} *= vs${N}; 92 vp${N} = vp${N} * vt${N} + vt${N}; [all …]
|
D | wasmsimd-rr2-p6.c.in | 67 …v128_t vt${ABC[N:N+4]} = wasm_f32x4_add(wasm_f32x4_mul(vn${ABC[N:N+4]}, vminus_ln2_hi), vz${ABC[N:… 72 … vt${ABC[N:N+4]} = wasm_f32x4_add(wasm_f32x4_mul(vn${ABC[N:N+4]}, vminus_ln2_lo), vt${ABC[N:N+4]}); 78 vt${ABC[N:N+4]} = wasm_v128_andnot(vt${ABC[N:N+4]}, vsatm${ABC[N:N+4]}); 79 v128_t vp${ABC[N:N+4]} = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt${ABC[N:N+4]}), vc5); 82 vp${ABC[N:N+4]} = wasm_f32x4_add(wasm_f32x4_mul(vp${ABC[N:N+4]}, vt${ABC[N:N+4]}), vc4); 85 vp${ABC[N:N+4]} = wasm_f32x4_add(wasm_f32x4_mul(vp${ABC[N:N+4]}, vt${ABC[N:N+4]}), vc3); 88 vp${ABC[N:N+4]} = wasm_f32x4_add(wasm_f32x4_mul(vp${ABC[N:N+4]}, vt${ABC[N:N+4]}), vc2); 91 vp${ABC[N:N+4]} = wasm_f32x4_mul(vp${ABC[N:N+4]}, vt${ABC[N:N+4]}); 94 vt${ABC[N:N+4]} = wasm_f32x4_mul(vt${ABC[N:N+4]}, vs${ABC[N:N+4]}); 98 …vp${ABC[N:N+4]} = wasm_f32x4_add(wasm_f32x4_mul(vp${ABC[N:N+4]}, vt${ABC[N:N+4]}), vt${ABC[N:N+4]}… [all …]
|
D | neon-p6.c.in | 65 … float32x4_t vt${ABC[N:N+4]} = ${VMULADDQ_F32}(vz${ABC[N:N+4]}, vn${ABC[N:N+4]}, vminus_ln2); 68 … float32x4_t vt${ABC[N:N+4]} = ${VMULADDQ_F32}(vz${ABC[N:N+4]}, vn${ABC[N:N+4]}, vminus_ln2_hi); 71 vt${ABC[N:N+4]} = ${VMULADDQ_F32}(vt${ABC[N:N+4]}, vn${ABC[N:N+4]}, vminus_ln2_lo); 74 float32x4_t vp${ABC[N:N+4]} = ${VMULADDQ_F32}(vc5, vc6, vt${ABC[N:N+4]}); 77 vp${ABC[N:N+4]} = ${VMULADDQ_F32}(vc4, vp${ABC[N:N+4]}, vt${ABC[N:N+4]}); 80 vp${ABC[N:N+4]} = ${VMULADDQ_F32}(vc3, vp${ABC[N:N+4]}, vt${ABC[N:N+4]}); 83 vp${ABC[N:N+4]} = ${VMULADDQ_F32}(vc2, vp${ABC[N:N+4]}, vt${ABC[N:N+4]}); 86 vp${ABC[N:N+4]} = vmulq_f32(vp${ABC[N:N+4]}, vt${ABC[N:N+4]}); 89 vt${ABC[N:N+4]} = vmulq_f32(vt${ABC[N:N+4]}, vs${ABC[N:N+4]}); 93 vp${ABC[N:N+4]} = ${VMULADDQ_F32}(vt${ABC[N:N+4]}, vp${ABC[N:N+4]}, vt${ABC[N:N+4]}); [all …]
|
/external/lzma/CPP/Windows/ |
D | PropVariant.cpp | 31 p->vt = VT_ERROR; in PropVarEm_Alloc_Bstr() 35 p->vt = VT_BSTR; in PropVarEm_Alloc_Bstr() 44 p->vt = VT_BSTR; in PropVarEm_Set_Str() 47 p->vt = VT_ERROR; in PropVarEm_Set_Str() 54 vt = VT_EMPTY; in CPropVariant() 60 vt = VT_EMPTY; in CPropVariant() 66 vt = VT_EMPTY; in CPropVariant() 72 vt = VT_EMPTY; in CPropVariant() 99 vt = VT_BSTR; in operator =() 114 vt = VT_BSTR; in operator =() [all …]
|
/external/XNNPACK/src/f32-sigmoid/ |
D | scalar-p5-div.c.in | 56 float vt${N} = vn${N} * vln2_hi + vz${N}; 59 vt${N} = vn${N} * vln2_lo + vt${N}; 62 float vp${N} = vt${N} * vc5 + vc4; 65 vp${N} = vt${N} * vp${N} + vc3; 68 vp${N} = vt${N} * vp${N} + vc2; 71 vp${N} = vt${N} * vp${N} + vc1; 74 vt${N} *= vs${N}; 77 const float ve${N} = vt${N} * vp${N} + vs${N}; 109 float vt = vn * vln2_hi + vz; variable 110 vt = vn * vln2_lo + vt; [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | wasmsimd-p5-div-x4.c | 48 v128_t vt = wasm_f32x4_add(vz, wasm_f32x4_mul(vn, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() local 49 vt = wasm_f32x4_add(vt, wasm_f32x4_mul(vn, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() 51 v128_t vp = wasm_f32x4_add(vc4, wasm_f32x4_mul(vt, vc5)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() 52 vp = wasm_f32x4_add(vc3, wasm_f32x4_mul(vt, vp)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() 53 vp = wasm_f32x4_add(vc2, wasm_f32x4_mul(vt, vp)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() 54 vp = wasm_f32x4_add(vc1, wasm_f32x4_mul(vt, vp)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() 56 vt = wasm_f32x4_mul(vt, vs); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() 57 const v128_t ve = wasm_f32x4_add(vs, wasm_f32x4_mul(vt, vp)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() 76 v128_t vt = wasm_f32x4_add(vz, wasm_f32x4_mul(vn, vln2_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() local 77 vt = wasm_f32x4_add(vt, wasm_f32x4_mul(vn, vln2_lo)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4() [all …]
|
D | sse41-p5-div-x4.c | 48 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() local 49 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() 51 __m128 vp = _mm_add_ps(_mm_mul_ps(vc5, vt), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() 52 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() 53 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc2); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() 54 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc1); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() 56 vt = _mm_mul_ps(vt, vs); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() 57 __m128 ve = _mm_add_ps(_mm_mul_ps(vt, vp), vs); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() 79 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() local 80 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() [all …]
|
D | sse2-p5-div-x4.c | 48 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() local 49 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() 51 __m128 vp = _mm_add_ps(_mm_mul_ps(vc5, vt), vc4); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() 52 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc3); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() 53 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc2); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() 54 vp = _mm_add_ps(_mm_mul_ps(vp, vt), vc1); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() 56 vt = _mm_mul_ps(vt, vs); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() 57 __m128 ve = _mm_add_ps(_mm_mul_ps(vt, vp), vs); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() 80 __m128 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_hi), vz); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() local 81 vt = _mm_add_ps(_mm_mul_ps(vn, vminus_ln2_lo), vt); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x4() [all …]
|
D | neon-rr2-p5-nr2recps-x4.c | 46 float32x4_t vt = vmlaq_f32(vz, vn, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() local 47 vt = vmlaq_f32(vt, vn, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() 49 float32x4_t vp = vmlaq_f32(vc4, vc5, vt); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() 50 vp = vmlaq_f32(vc3, vp, vt); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() 51 vp = vmlaq_f32(vc2, vp, vt); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() 52 vp = vmlaq_f32(vc1, vp, vt); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() 54 vt = vmulq_f32(vt, vs); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() 55 const float32x4_t ve = vmlaq_f32(vs, vp, vt); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() 77 float32x4_t vt = vmlaq_f32(vz, vn, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() local 78 vt = vmlaq_f32(vt, vn, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-p5-x4.c | 67 float32x4_t vt = vfmaq_f32(vx, vn, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() local 68 vt = vfmaq_f32(vt, vn, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 71 float32x4_t vp = vfmaq_f32(vc4, vc5, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 72 vp = vfmaq_f32(vc3, vp, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 73 vp = vfmaq_f32(vc2, vp, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 74 vp = vfmaq_f32(vc1, vp, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 80 vt = vmulq_f32(vt, vs); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 81 float32x4_t vf = vfmaq_f32(vs, vp, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 124 float32x4_t vt = vfmaq_f32(vx, vn, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() local 125 vt = vfmaq_f32(vt, vn, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() [all …]
|
D | neon-p5-x4.c | 68 float32x4_t vt = vmlaq_f32(vx, vn, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() local 69 vt = vmlaq_f32(vt, vn, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 72 float32x4_t vp = vmlaq_f32(vc4, vc5, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 73 vp = vmlaq_f32(vc3, vp, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 74 vp = vmlaq_f32(vc2, vp, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 75 vp = vmlaq_f32(vc1, vp, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 81 vt = vmulq_f32(vt, vs); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 82 float32x4_t vf = vmlaq_f32(vs, vp, vt); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 125 float32x4_t vt = vmlaq_f32(vx, vn, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() local 126 vt = vmlaq_f32(vt, vn, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() [all …]
|
/external/XNNPACK/src/math/ |
D | expm1minus-scalar-rr2-p6.c | 60 float vt = vn * vminus_ln2_hi + vx; in xnn_math_f32_expm1minus__scalar_rr2_p6() local 61 vt = vn * vminus_ln2_lo + vt; in xnn_math_f32_expm1minus__scalar_rr2_p6() 67 vt = 0.0f; in xnn_math_f32_expm1minus__scalar_rr2_p6() 73 float vp = vc6 * vt + vc5; in xnn_math_f32_expm1minus__scalar_rr2_p6() 74 vp = vp * vt + vc4; in xnn_math_f32_expm1minus__scalar_rr2_p6() 75 vp = vp * vt + vc3; in xnn_math_f32_expm1minus__scalar_rr2_p6() 76 vp = vp * vt + vc2; in xnn_math_f32_expm1minus__scalar_rr2_p6() 77 vp *= vt; in xnn_math_f32_expm1minus__scalar_rr2_p6() 83 vt *= vs; in xnn_math_f32_expm1minus__scalar_rr2_p6() 85 vp = vp * vt + vt; in xnn_math_f32_expm1minus__scalar_rr2_p6()
|
D | expm1minus-wasmsimd-rr2-p6-andnot.c | 62 v128_t vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_hi), vx); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() local 63 vt = wasm_f32x4_add(wasm_f32x4_mul(vn, vminus_ln2_lo), vt); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() 69 vt = wasm_v128_andnot(vt, vm); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() 74 v128_t vp = wasm_f32x4_add(wasm_f32x4_mul(vc6, vt), vc5); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() 75 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc4); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() 76 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc3); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() 77 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vc2); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() 78 vp = wasm_f32x4_mul(vp, vt); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() 84 vt = wasm_f32x4_mul(vt, vs); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot() 86 vp = wasm_f32x4_add(wasm_f32x4_mul(vp, vt), vt); in xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot()
|