/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasm-rr2-p6-x6.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 94 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 95 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 96 float vp2 = vc6 * vt2 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 97 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 98 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 99 float vp5 = vc6 * vt5 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 191 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
D | velu-wasm-rr2-p6-x5.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local 87 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 88 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 89 float vp2 = vc6 * vt2 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 90 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 91 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 172 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
|
D | velu-scalar-rr2-p6-x6.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 118 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 119 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 120 float vp2 = vc6 * vt2 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 121 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 122 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 123 float vp5 = vc6 * vt5 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 231 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-wasm-rr2-p6-x4.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local 80 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 81 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 82 float vp2 = vc6 * vt2 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 83 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 153 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
|
D | velu-scalar-rr2-p6-x5.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 107 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 108 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 109 float vp2 = vc6 * vt2 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 110 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 111 float vp4 = vc6 * vt4 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 206 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
D | velu-scalar-rr2-p6-x4.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local 96 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 97 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 98 float vp2 = vc6 * vt2 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 99 float vp3 = vc6 * vt3 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 181 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
|
D | velu-scalar-rr2-p6-x3.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() local 85 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 86 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 87 float vp2 = vc6 * vt2 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 156 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
|
D | velu-wasm-rr2-p6-x3.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() local 73 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 74 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 75 float vp2 = vc6 * vt2 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 134 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
|
D | velu-avx2-rr1-p6-x72.c | 36 const __m256 vc6 = _mm256_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local 103 __m256 vp0 = _mm256_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 104 __m256 vp1 = _mm256_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 105 __m256 vp2 = _mm256_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 106 __m256 vp3 = _mm256_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 107 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 108 __m256 vp5 = _mm256_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 109 __m256 vp6 = _mm256_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 110 __m256 vp7 = _mm256_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 111 __m256 vp8 = _mm256_fmadd_ps(vc6, vt8, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() [all …]
|
D | velu-avx2-rr1-p6-x80.c | 36 const __m256 vc6 = _mm256_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() local 109 __m256 vp0 = _mm256_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 110 __m256 vp1 = _mm256_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 111 __m256 vp2 = _mm256_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 112 __m256 vp3 = _mm256_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 113 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 114 __m256 vp5 = _mm256_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 115 __m256 vp6 = _mm256_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 116 __m256 vp7 = _mm256_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 117 __m256 vp8 = _mm256_fmadd_ps(vc6, vt8, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() [all …]
|
D | velu-avx2-rr1-p6-x64.c | 36 const __m256 vc6 = _mm256_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() local 97 __m256 vp0 = _mm256_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 98 __m256 vp1 = _mm256_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 99 __m256 vp2 = _mm256_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 100 __m256 vp3 = _mm256_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 101 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 102 __m256 vp5 = _mm256_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 103 __m256 vp6 = _mm256_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 104 __m256 vp7 = _mm256_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 215 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() [all …]
|
D | velu-avx512f-rr1-p6-x128.c | 36 const __m512 vc6 = _mm512_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local 97 __m512 vp0 = _mm512_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 98 __m512 vp1 = _mm512_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 99 __m512 vp2 = _mm512_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 100 __m512 vp3 = _mm512_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 101 __m512 vp4 = _mm512_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 102 __m512 vp5 = _mm512_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 103 __m512 vp6 = _mm512_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 104 __m512 vp7 = _mm512_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 218 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() [all …]
|
D | velu-avx512f-rr1-p6-x96.c | 36 const __m512 vc6 = _mm512_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() local 85 __m512 vp0 = _mm512_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 86 __m512 vp1 = _mm512_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 87 __m512 vp2 = _mm512_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 88 __m512 vp3 = _mm512_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 89 __m512 vp4 = _mm512_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 90 __m512 vp5 = _mm512_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 182 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96() 216 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x96()
|
D | velu-avx2-rr1-p6-x48.c | 36 const __m256 vc6 = _mm256_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() local 85 __m256 vp0 = _mm256_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 86 __m256 vp1 = _mm256_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 87 __m256 vp2 = _mm256_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 88 __m256 vp3 = _mm256_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 89 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 90 __m256 vp5 = _mm256_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 179 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() 211 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
|
D | velu-avx512f-rr1-p6-x112.c | 36 const __m512 vc6 = _mm512_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local 91 __m512 vp0 = _mm512_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 92 __m512 vp1 = _mm512_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 93 __m512 vp2 = _mm512_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 94 __m512 vp3 = _mm512_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 95 __m512 vp4 = _mm512_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 96 __m512 vp5 = _mm512_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 97 __m512 vp6 = _mm512_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 200 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 234 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
|
D | velu-avx2-rr1-p6-x56.c | 36 const __m256 vc6 = _mm256_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() local 91 __m256 vp0 = _mm256_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 92 __m256 vp1 = _mm256_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 93 __m256 vp2 = _mm256_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 94 __m256 vp3 = _mm256_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 95 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 96 __m256 vp5 = _mm256_fmadd_ps(vc6, vt5, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 97 __m256 vp6 = _mm256_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 197 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 229 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
|
D | velu-wasm-rr2-p6-x2.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() local 66 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 67 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 114 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2()
|
D | velu-scalar-rr2-p6-x2.c | 36 const float vc6 = 0x1.6b7338p-10f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() local 74 float vp0 = vc6 * vt0 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 75 float vp1 = vc6 * vt1 + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 130 float vp = vc6 * vt + vc5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2()
|
D | velu-avx2-rr1-p6-x32.c | 36 const __m256 vc6 = _mm256_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() local 73 __m256 vp0 = _mm256_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 74 __m256 vp1 = _mm256_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 75 __m256 vp2 = _mm256_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 76 __m256 vp3 = _mm256_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 143 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 175 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32()
|
D | velu-avx512f-rr1-p6-x64.c | 36 const __m512 vc6 = _mm512_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() local 73 __m512 vp0 = _mm512_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 74 __m512 vp1 = _mm512_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 75 __m512 vp2 = _mm512_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 76 __m512 vp3 = _mm512_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 146 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64() 180 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x64()
|
D | velu-neonfma-rr1-p6-x20.c | 37 const float32x4_t vc6 = vmovq_n_f32(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 80 float32x4_t vp0123 = vfmaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 81 float32x4_t vp4567 = vfmaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 82 float32x4_t vp89AB = vfmaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 83 float32x4_t vpCDEF = vfmaq_f32(vc5, vc6, vtCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 84 float32x4_t vpGHIJ = vfmaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 167 float32x4_t vp = vfmaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 195 float32x4_t vp = vfmaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-avx512f-rr1-p6-x80.c | 36 const __m512 vc6 = _mm512_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() local 79 __m512 vp0 = _mm512_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 80 __m512 vp1 = _mm512_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 81 __m512 vp2 = _mm512_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 82 __m512 vp3 = _mm512_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 83 __m512 vp4 = _mm512_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 164 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80() 198 __m512 vp = _mm512_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x80()
|
D | velu-avx2-rr1-p6-x40.c | 36 const __m256 vc6 = _mm256_set1_ps(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() local 79 __m256 vp0 = _mm256_fmadd_ps(vc6, vt0, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() 80 __m256 vp1 = _mm256_fmadd_ps(vc6, vt1, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() 81 __m256 vp2 = _mm256_fmadd_ps(vc6, vt2, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() 82 __m256 vp3 = _mm256_fmadd_ps(vc6, vt3, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() 83 __m256 vp4 = _mm256_fmadd_ps(vc6, vt4, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() 161 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40() 193 __m256 vp = _mm256_fmadd_ps(vc6, vt, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x40()
|
D | velu-neon-rr2-p6-x24.c | 38 const float32x4_t vc6 = vmovq_n_f32(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 94 float32x4_t vp0123 = vmlaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 95 float32x4_t vp4567 = vmlaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 96 float32x4_t vp89AB = vmlaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 97 float32x4_t vpCDEF = vmlaq_f32(vc5, vc6, vtCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 98 float32x4_t vpGHIJ = vmlaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 99 float32x4_t vpKLMN = vmlaq_f32(vc5, vc6, vtKLMN); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 195 float32x4_t vp = vmlaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 224 float32x4_t vp = vmlaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-neonfma-rr1-p6-x24.c | 37 const float32x4_t vc6 = vmovq_n_f32(0x1.6b7338p-10f); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 86 float32x4_t vp0123 = vfmaq_f32(vc5, vc6, vt0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 87 float32x4_t vp4567 = vfmaq_f32(vc5, vc6, vt4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 88 float32x4_t vp89AB = vfmaq_f32(vc5, vc6, vt89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 89 float32x4_t vpCDEF = vfmaq_f32(vc5, vc6, vtCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 90 float32x4_t vpGHIJ = vfmaq_f32(vc5, vc6, vtGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 91 float32x4_t vpKLMN = vfmaq_f32(vc5, vc6, vtKLMN); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 186 float32x4_t vp = vfmaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 214 float32x4_t vp = vfmaq_f32(vc5, vc6, vt); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|