/external/XNNPACK/src/f32-sigmoid/ |
D | sse-lut64-p2-div.c.in | 168 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); variable 181 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); variable 233 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); variable 246 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); variable
|
/external/XNNPACK/src/f32-velu/ |
D | sse-rr2-lut16-p3.c.in | 173 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); variable 186 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); variable 240 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); variable 253 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); variable
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse41-rr2-lut16-p3-x4.c | 61 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 66 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 104 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 109 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
|
D | velu-sse2-rr2-lut16-p3-x4.c | 63 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 70 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 111 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 118 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local
|
D | velu-sse41-rr2-lut16-p3-x8.c | 151 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 156 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 194 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 199 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local
|
D | velu-neonfma-rr1-lut16-p3-x4.c | 56 …int32x2_t vl_hi = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local 92 …int32x2_t vl_hi = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
|
D | velu-sse2-rr2-lut16-p3-x8.c | 163 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 170 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 211 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 218 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local
|
D | velu-neon-rr2-lut16-p3-x4.c | 57 …int32x2_t vl_hi = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local 94 …int32x2_t vl_hi = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-lut64-p2-div-x4.c | 54 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 59 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 96 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 101 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local
|
D | sse2-lut64-p2-div-x4.c | 56 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 63 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 103 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 110 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local
|
D | sse41-lut64-p2-div-x8.c | 141 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 146 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 183 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local 188 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() local
|
D | neonfma-rr1-lut64-p2-div-x4.c | 48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local 82 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
|
D | neonfma-rr1-lut2048-p1-div-x4.c | 48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local 81 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
|
D | neonfma-rr1-lut64-p2-nr2fma-x4.c | 48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local 86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c | 48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local 85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
|
D | neonfma-rr1-lut2048-p1-nr2recps-x4.c | 48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local 85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
|
D | neonfma-rr1-lut2048-p1-nr2fma-x4.c | 48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local 85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
|
D | neonfma-rr1-lut64-p2-nr2recps-x4.c | 48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local 86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x4.c | 48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local 86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
|
D | sse2-lut64-p2-div-x8.c | 154 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 161 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 201 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local 208 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() local
|
D | sse41-lut64-p2-div-x12.c | 173 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 178 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 215 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local 220 …const __m128i vl_hi = _mm_insert_epi32(vl_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_ov… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() local
|
D | neon-rr2-lut2048-p1-nr2recps-x4.c | 49 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local 87 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
|
D | neon-rr2-lut64-p2-nr2recps-x4.c | 49 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local 88 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-lut64-p2-x4.c | 76 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local 152 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local
|
D | neon-lut64-p2-x4.c | 77 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local 153 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local
|