/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-lut64-p2-div-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 78 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 79 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 80 …128i vl01 = _mm_insert_epi32(vl0, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 81 …128i vl23 = _mm_insert_epi32(vl2, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 85 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 86 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 87 …128i vl45 = _mm_insert_epi32(vl4, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 88 …128i vl67 = _mm_insert_epi32(vl6, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 92 …__m128i vl8 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() [all …]
|
D | sse41-lut64-p2-div-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 68 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 69 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 70 …128i vl01 = _mm_insert_epi32(vl0, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 71 …128i vl23 = _mm_insert_epi32(vl2, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 75 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 76 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 77 …128i vl45 = _mm_insert_epi32(vl4, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 78 …128i vl67 = _mm_insert_epi32(vl6, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 82 …__m128i vl8 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() [all …]
|
D | sse41-lut64-p2-div-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 73 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 74 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 75 …128i vl01 = _mm_insert_epi32(vl0, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 76 …128i vl23 = _mm_insert_epi32(vl2, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 80 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 81 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 82 …128i vl45 = _mm_insert_epi32(vl4, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 83 …128i vl67 = _mm_insert_epi32(vl6, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 87 …__m128i vl8 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() [all …]
|
D | sse41-lut64-p2-div-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 63 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 64 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 65 …128i vl01 = _mm_insert_epi32(vl0, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 66 …128i vl23 = _mm_insert_epi32(vl2, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 70 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 71 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 72 …128i vl45 = _mm_insert_epi32(vl4, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 73 …128i vl67 = _mm_insert_epi32(vl6, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 77 …__m128i vl8 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() [all …]
|
D | sse41-lut64-p2-div-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 58 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 59 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 60 …128i vl01 = _mm_insert_epi32(vl0, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 61 …128i vl23 = _mm_insert_epi32(vl2, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 65 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 66 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 67 …128i vl45 = _mm_insert_epi32(vl4, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 68 …128i vl67 = _mm_insert_epi32(vl6, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 75 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() [all …]
|
D | sse2-lut64-p2-div-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 73 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 74 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 75 …t __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 77 …t __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 82 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 83 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 84 …t __m128i vl5 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 86 …t __m128i vl7 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 91 …__m128i vl8 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() [all …]
|
D | sse2-lut64-p2-div-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 78 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 79 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 80 …t __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 82 …t __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 87 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 88 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 89 …t __m128i vl5 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 91 …t __m128i vl7 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 96 …__m128i vl8 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() [all …]
|
D | sse2-lut64-p2-div-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 63 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 64 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 65 …t __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 67 …t __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 72 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 73 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 74 …t __m128i vl5 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 76 …t __m128i vl7 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 81 …__m128i vl8 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() [all …]
|
D | sse2-lut64-p2-div-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 68 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 69 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 70 …t __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 72 …t __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 77 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 78 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 79 …t __m128i vl5 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 81 …t __m128i vl7 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 86 …__m128i vl8 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() [all …]
|
D | sse2-lut64-p2-div-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 58 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 59 …t __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 60 …t __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 62 …t __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 67 …__m128i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 68 …t __m128i vl6 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 69 …t __m128i vl5 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 71 …t __m128i vl7 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 79 …__m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() [all …]
|
D | sse41-lut64-p2-div-x4.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 51 …m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 52 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 53 …i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 54 …i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 56 …__m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 57 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 58 …i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 59 …i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 93 …m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() [all …]
|
D | neonfma-rr1-lut64-p2-div-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 75 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 79 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 80 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 83 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 84 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 87 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 88 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 91 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() [all …]
|
D | wasmsimd-lut64-p2-div-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 76 …const float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx0… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 77 …const float vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 78 …const float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx23)… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 79 …const float vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx23… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 83 …const float vl4 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx4… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 84 …const float vl5 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx45… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 85 …const float vl6 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx67)… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 86 …const float vl7 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx67… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() 90 …const float vl8 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx8… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24() [all …]
|
D | sse2-lut64-p2-div-x4.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 51 …m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 52 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 53 …__m128i vl_lh = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 55 …__m128i vl_hh = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 58 …__m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 59 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 60 …__m128i vl_lh = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 62 …__m128i vl_hh = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 98 …m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() [all …]
|
D | wasmsimd-lut64-p2-div-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 71 …const float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx0… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 72 …const float vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 73 …const float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx23)… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 74 …const float vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx23… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 78 …const float vl4 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx4… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 79 …const float vl5 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx45… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 80 …const float vl6 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx67)… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 81 …const float vl7 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx67… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() 85 …const float vl8 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx8… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20() [all …]
|
D | neonfma-rr1-lut64-p2-div-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 70 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 71 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 74 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 75 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 78 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 79 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 82 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 83 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 86 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() [all …]
|
D | wasmsimd-lut64-p2-div-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 66 …const float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx0… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 67 …const float vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 68 …const float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx23)… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 69 …const float vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx23… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 73 …const float vl4 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx4… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 74 …const float vl5 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx45… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 75 …const float vl6 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx67)… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 76 …const float vl7 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx67… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() 80 …const float vl8 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx8… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16() [all …]
|
D | neonfma-rr1-lut64-p2-div-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 65 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 66 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 69 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 70 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 73 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 74 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 77 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 78 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 80 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() [all …]
|
D | wasmsimd-lut64-p2-div-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 61 …const float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx0… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 62 …const float vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 63 …const float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx23)… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 64 …const float vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx23… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 68 …const float vl4 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx4… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 69 …const float vl5 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx45… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 70 …const float vl6 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx67)… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 71 …const float vl7 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx67… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 75 …const float vl8 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx8… in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() [all …]
|
D | neonfma-rr1-lut64-p2-div-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 60 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 61 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 64 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 65 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 68 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 69 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 71 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 72 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 74 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() [all …]
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 75 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 79 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 80 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 83 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 84 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 87 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 88 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 91 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() [all …]
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 75 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 79 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 80 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 83 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 84 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 87 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 88 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 91 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() [all …]
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 75 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 79 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 80 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 83 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 84 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 87 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 88 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 91 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() [all …]
|
D | neon-rr2-lut64-p2-nr2recps-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 76 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 77 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 80 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 81 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 84 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 85 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 88 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 89 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 92 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() [all …]
|
/external/XNNPACK/src/math/ |
D | sigmoid-avx-rr2-lut64-p2-div.c | 16 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_64[64]; 84 …__m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 85 …__m128i vl_lh = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 86 …__m128i vl_hl = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 87 …__m128i vl_hh = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 88 …vl_ll = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint3… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 89 …vl_lh = _mm_insert_epi32(vl_lh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint3… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 90 …vl_hl = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint3… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 91 …vl_hh = _mm_insert_epi32(vl_hh, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint3… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() 93 …__m128i vl_ll = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uin… in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() [all …]
|