/external/XNNPACK/src/f32-sigmoid/ |
D | sse-lut64-p2-div.c.in | 163 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); variable 176 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); variable 228 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); variable 241 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); variable
|
/external/XNNPACK/src/f32-velu/ |
D | sse-rr2-lut16-p3.c.in | 168 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); variable 181 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); variable 235 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); variable 248 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); variable
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse41-rr2-lut16-p3-x4.c | 60 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 65 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 103 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 108 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
|
D | velu-sse2-rr2-lut16-p3-x4.c | 61 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 68 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 109 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 116 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local
|
D | velu-sse41-rr2-lut16-p3-x8.c | 150 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 155 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 193 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 198 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local
|
D | velu-neonfma-rr1-lut16-p3-x4.c | 55 …int32x2_t vl_lo = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local 91 …int32x2_t vl_lo = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
|
D | velu-sse2-rr2-lut16-p3-x8.c | 161 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 168 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 209 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 216 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local
|
D | velu-neon-rr2-lut16-p3-x4.c | 56 …int32x2_t vl_lo = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local 93 …int32x2_t vl_lo = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-lut64-p2-div-x4.c | 53 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 58 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 95 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 100 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local
|
D | sse2-lut64-p2-div-x4.c | 54 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 61 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 101 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 108 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local
|
D | sse41-lut64-p2-div-x8.c | 140 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 145 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 182 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 187 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local
|
D | neonfma-rr1-lut64-p2-div-x4.c | 47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local 81 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
|
D | neonfma-rr1-lut2048-p1-div-x4.c | 47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local 80 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
|
D | neonfma-rr1-lut64-p2-nr2fma-x4.c | 47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local 85 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c | 47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local 84 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
|
D | neonfma-rr1-lut2048-p1-nr2recps-x4.c | 47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local 84 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
|
D | neonfma-rr1-lut2048-p1-nr2fma-x4.c | 47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local 84 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
|
D | neonfma-rr1-lut64-p2-nr2recps-x4.c | 47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local 85 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x4.c | 47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local 85 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
|
D | sse2-lut64-p2-div-x8.c | 152 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 159 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 199 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 206 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local
|
D | sse41-lut64-p2-div-x12.c | 172 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 177 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 214 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 219 …const __m128i vl_lo = _mm_insert_epi32(vl_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local
|
D | neon-rr2-lut2048-p1-nr2recps-x4.c | 48 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local 86 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
|
D | neon-rr2-lut64-p2-nr2recps-x4.c | 48 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local 87 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-lut64-p2-x4.c | 75 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local 151 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local
|
D | neon-lut64-p2-x4.c | 76 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local 152 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local
|