/external/XNNPACK/src/f32-argmaxpool/ |
D | 9p8x-wasmsimd-c4.c | 173 const v128_t vidx2 = wasm_i32x4_add(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local 175 vidx = wasm_v128_bitselect(vidx2, vidx, vm2); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 178 const v128_t vidx3 = wasm_i32x4_add(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 288 const v128_t vidx2 = wasm_i32x4_add(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local 290 vidx = wasm_v128_bitselect(vidx2, vidx, vm2); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 293 const v128_t vidx3 = wasm_i32x4_add(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 345 const v128_t vidx2 = wasm_i32x4_add(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local 347 vidx = wasm_v128_bitselect(vidx2, vidx, vm2); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 350 const v128_t vidx3 = wasm_i32x4_add(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4()
|
D | 9p8x-neon-c4.c | 154 const uint32x4_t vidx2 = vaddq_u32(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 156 vidx = vbslq_u32(vm2, vidx2, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 159 const uint32x4_t vidx3 = vaddq_u32(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 257 const uint32x4_t vidx2 = vaddq_u32(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 259 vidx = vbslq_u32(vm2, vidx2, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 262 const uint32x4_t vidx3 = vaddq_u32(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 312 const uint32x4_t vidx2 = vaddq_u32(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 314 vidx = vbslq_u32(vm2, vidx2, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 317 const uint32x4_t vidx3 = vaddq_u32(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4()
|
D | 9p8x-sse2-c4.c | 173 const __m128i vidx2 = _mm_add_epi32(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local 175 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, vidx2)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 178 const __m128i vidx3 = _mm_add_epi32(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 288 const __m128i vidx2 = _mm_add_epi32(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local 290 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, vidx2)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 293 const __m128i vidx3 = _mm_add_epi32(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 345 const __m128i vidx2 = _mm_add_epi32(vidx1, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local 347 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, vidx2)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 350 const __m128i vidx3 = _mm_add_epi32(vidx2, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
|
/external/XNNPACK/src/math/ |
D | expm1minus-f32-sse2-rr2-lut16-p3.c | 82 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3() local 85 …m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + vidx2))); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3()
|
D | sigmoid-f32-sse2-rr2-lut64-p2-div.c | 87 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div() local 90 …m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx2))); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div()
|
D | sigmoid-f32-sse2-rr2-lut64-p2-nr1.c | 88 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1() local 91 …m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx2))); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1()
|
D | exp-f32-sse2-rr2-lut64-p2.c | 80 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_exp__sse2_rr2_lut64_p2() local 83 …st __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx2))); in xnn_math_f32_exp__sse2_rr2_lut64_p2()
|
D | sigmoid-f32-sse2-rr2-lut64-p2-nr2.c | 88 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2() local 91 …m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx2))); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2()
|
/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-scalar-rr2-lut2048-p1-div-x4.c | 64 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4() local 65 const float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_2048[vidx2] + ve2); in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4()
|
D | vsigmoid-scalar-rr2-lut64-p2-div-x4.c | 64 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4() local 65 const float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_64[vidx2] + ve2); in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x8.c | 73 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() local 76 …m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx2))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-rr2-lut64-p2-x4-acc4.c | 87 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4_acc4() local 92 const float vs2 = uint32_as_float(xnn_table_exp2_k_over_64[vidx2] + ve2); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4_acc4()
|
D | scalar-rr2-lut64-p2-x4-acc2.c | 85 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4_acc2() local 90 const float vs2 = uint32_as_float(xnn_table_exp2_k_over_64[vidx2] + ve2); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4_acc2()
|
D | scalar-rr2-lut64-p2-x4.c | 84 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4() local 89 const float vs2 = uint32_as_float(xnn_table_exp2_k_over_64[vidx2] + ve2); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x4()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-lut16-p3-x3.c | 62 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() local 70 float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx2] + ven2); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
|
D | velu-wasm-rr2-lut16-p3-x3.c | 62 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() local 70 float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx2] + ven2); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
|
D | velu-wasm-rr2-lut16-p3-x4.c | 65 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local 76 float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx2] + ven2); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
|
D | velu-scalar-rr2-lut16-p3-x4.c | 65 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local 76 float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx2] + ven2); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
|
D | velu-wasm-rr2-lut16-p3-x5.c | 68 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local 82 float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx2] + ven2); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
|
D | velu-scalar-rr2-lut16-p3-x5.c | 68 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local 82 float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx2] + ven2); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
|
D | velu-wasm-rr2-lut16-p3-x6.c | 71 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local 88 float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx2] + ven2); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
|
D | velu-scalar-rr2-lut16-p3-x6.c | 71 const uint32_t vidx2 = float_as_uint32(vn2) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 88 float vs2 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx2] + ven2); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-avx2-rr1-lut16-p3-gather-x24.c | 57 const __m256i vidx2 = _mm256_and_si256(_mm256_castps_si256(vn2), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24() local 58 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24()
|
D | velu-avx2-rr1-lut16-p3-gather-x32.c | 60 const __m256i vidx2 = _mm256_and_si256(_mm256_castps_si256(vn2), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() local 61 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
|
D | velu-avx2-rr1-lut16-p3-gather-x40.c | 63 const __m256i vidx2 = _mm256_and_si256(_mm256_castps_si256(vn2), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() local 64 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
|