/external/XNNPACK/src/f32-argmaxpool/ |
D | 9p8x-wasmsimd-c4.c | 76 v128_t vidx = wasm_i32x4_splat(0); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local 80 vidx = wasm_v128_bitselect(wasm_i32x4_splat(1), vidx, vm1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 84 vidx = wasm_v128_bitselect(wasm_i32x4_splat(2), vidx, vm2); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 88 vidx = wasm_v128_bitselect(wasm_i32x4_splat(3), vidx, vm3); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 92 vidx = wasm_v128_bitselect(wasm_i32x4_splat(4), vidx, vm4); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 96 vidx = wasm_v128_bitselect(wasm_i32x4_splat(5), vidx, vm5); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 100 vidx = wasm_v128_bitselect(wasm_i32x4_splat(6), vidx, vm6); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 104 vidx = wasm_v128_bitselect(wasm_i32x4_splat(7), vidx, vm7); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 108 vidx = wasm_v128_bitselect(wasm_i32x4_splat(8), vidx, vm8); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 112 wasm_v128_store(ib, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() [all …]
|
D | 9p8x-neon-c4.c | 67 uint32x4_t vidx = vmovq_n_u32(0); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 71 vidx = vbslq_u32(vm1, vmovq_n_u32(1), vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 75 vidx = vbslq_u32(vm2, vmovq_n_u32(2), vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 79 vidx = vbslq_u32(vm3, vmovq_n_u32(3), vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 83 vidx = vbslq_u32(vm4, vmovq_n_u32(4), vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 87 vidx = vbslq_u32(vm5, vmovq_n_u32(5), vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 91 vidx = vbslq_u32(vm6, vmovq_n_u32(6), vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 95 vidx = vbslq_u32(vm7, vmovq_n_u32(7), vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 99 vidx = vbslq_u32(vm8, vmovq_n_u32(8), vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 102 vst1q_u32(ib, vidx); ib += 4; in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() [all …]
|
D | 9x-wasmsimd-c4.c | 96 v128_t vidx = wasm_i32x4_splat(0); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() local 100 vidx = wasm_v128_bitselect(wasm_i32x4_splat(1), vidx, vm1); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 104 vidx = wasm_v128_bitselect(wasm_i32x4_splat(2), vidx, vm2); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 108 vidx = wasm_v128_bitselect(wasm_i32x4_splat(3), vidx, vm3); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 112 vidx = wasm_v128_bitselect(wasm_i32x4_splat(4), vidx, vm4); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 116 vidx = wasm_v128_bitselect(wasm_i32x4_splat(5), vidx, vm5); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 120 vidx = wasm_v128_bitselect(wasm_i32x4_splat(6), vidx, vm6); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 124 vidx = wasm_v128_bitselect(wasm_i32x4_splat(7), vidx, vm7); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 128 vidx = wasm_v128_bitselect(wasm_i32x4_splat(8), vidx, vm8); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 132 wasm_v128_store(index, vidx); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() [all …]
|
D | 9p8x-sse2-c4.c | 76 __m128i vidx = _mm_setzero_si128(); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local 80 vidx = _mm_or_si128(_mm_andnot_si128(vm1, vidx), _mm_and_si128(vm1, _mm_set1_epi32(1))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 84 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, _mm_set1_epi32(2))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 88 vidx = _mm_or_si128(_mm_andnot_si128(vm3, vidx), _mm_and_si128(vm3, _mm_set1_epi32(3))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 92 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, _mm_set1_epi32(4))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 96 vidx = _mm_or_si128(_mm_andnot_si128(vm5, vidx), _mm_and_si128(vm5, _mm_set1_epi32(5))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 100 vidx = _mm_or_si128(_mm_andnot_si128(vm6, vidx), _mm_and_si128(vm6, _mm_set1_epi32(6))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 104 vidx = _mm_or_si128(_mm_andnot_si128(vm7, vidx), _mm_and_si128(vm7, _mm_set1_epi32(7))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 108 vidx = _mm_or_si128(_mm_andnot_si128(vm8, vidx), _mm_and_si128(vm8, _mm_set1_epi32(8))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 112 _mm_store_si128((__m128i*) ib, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() [all …]
|
D | 9x-neon-c4.c | 86 uint32x4_t vidx = vmovq_n_u32(0); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() local 90 vidx = vbslq_u32(vm1, vmovq_n_u32(1), vidx); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 94 vidx = vbslq_u32(vm2, vmovq_n_u32(2), vidx); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 98 vidx = vbslq_u32(vm3, vmovq_n_u32(3), vidx); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 102 vidx = vbslq_u32(vm4, vmovq_n_u32(4), vidx); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 106 vidx = vbslq_u32(vm5, vmovq_n_u32(5), vidx); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 110 vidx = vbslq_u32(vm6, vmovq_n_u32(6), vidx); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 114 vidx = vbslq_u32(vm7, vmovq_n_u32(7), vidx); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 118 vidx = vbslq_u32(vm8, vmovq_n_u32(8), vidx); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 121 vst1q_u32(index, vidx); index += 4; in xnn_f32_argmaxpool_ukernel_9x__neon_c4() [all …]
|
D | 9x-sse2-c4.c | 95 __m128i vidx = _mm_setzero_si128(); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() local 99 vidx = _mm_or_si128(_mm_andnot_si128(vm1, vidx), _mm_and_si128(vm1, _mm_set1_epi32(1))); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 103 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, _mm_set1_epi32(2))); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 107 vidx = _mm_or_si128(_mm_andnot_si128(vm3, vidx), _mm_and_si128(vm3, _mm_set1_epi32(3))); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 111 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, _mm_set1_epi32(4))); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 115 vidx = _mm_or_si128(_mm_andnot_si128(vm5, vidx), _mm_and_si128(vm5, _mm_set1_epi32(5))); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 119 vidx = _mm_or_si128(_mm_andnot_si128(vm6, vidx), _mm_and_si128(vm6, _mm_set1_epi32(6))); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 123 vidx = _mm_or_si128(_mm_andnot_si128(vm7, vidx), _mm_and_si128(vm7, _mm_set1_epi32(7))); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 127 vidx = _mm_or_si128(_mm_andnot_si128(vm8, vidx), _mm_and_si128(vm8, _mm_set1_epi32(8))); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 131 _mm_storeu_si128((__m128i*) index, vidx); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() [all …]
|
D | 9p8x-scalar-c1.c | 67 uint32_t vidx = 0; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() local 71 vidx = 1; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 76 vidx = 2; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 81 vidx = 3; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 86 vidx = 4; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 91 vidx = 5; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 96 vidx = 6; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 101 vidx = 7; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 106 vidx = 8; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 110 *ib++ = vidx; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() [all …]
|
D | 4x-wasmsimd-c4.c | 61 v128_t vidx = wasm_i32x4_splat(0); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() local 65 vidx = wasm_v128_bitselect(wasm_i32x4_splat(1), vidx, vm1); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 69 vidx = wasm_v128_bitselect(wasm_i32x4_splat(2), vidx, vm2); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 73 vidx = wasm_v128_bitselect(wasm_i32x4_splat(3), vidx, vm3); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 77 wasm_v128_store(index, vidx); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 87 v128_t vidx = wasm_i32x4_splat(0); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() local 91 vidx = wasm_v128_bitselect(wasm_i32x4_splat(1), vidx, vm1); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 95 vidx = wasm_v128_bitselect(wasm_i32x4_splat(2), vidx, vm2); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 99 vidx = wasm_v128_bitselect(wasm_i32x4_splat(3), vidx, vm3); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 103 *((double*) index) = wasm_f64x2_extract_lane(vidx, 0); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() [all …]
|
D | 4x-sse2-c4.c | 60 __m128i vidx = _mm_setzero_si128(); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() local 64 vidx = _mm_or_si128(_mm_andnot_si128(vm1, vidx), _mm_and_si128(vm1, _mm_set1_epi32(1))); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 68 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, _mm_set1_epi32(2))); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 72 vidx = _mm_or_si128(_mm_andnot_si128(vm3, vidx), _mm_and_si128(vm3, _mm_set1_epi32(3))); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 76 _mm_storeu_si128((__m128i*) index, vidx); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 86 __m128i vidx = _mm_setzero_si128(); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() local 90 vidx = _mm_or_si128(_mm_andnot_si128(vm1, vidx), _mm_and_si128(vm1, _mm_set1_epi32(1))); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 94 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, _mm_set1_epi32(2))); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 98 vidx = _mm_or_si128(_mm_andnot_si128(vm3, vidx), _mm_and_si128(vm3, _mm_set1_epi32(3))); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 102 _mm_storel_epi64((__m128i*) index, vidx); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() [all …]
|
D | 4x-neon-c4.c | 56 uint32x4_t vidx = vmovq_n_u32(0); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() local 60 vidx = vbslq_u32(vm1, vmovq_n_u32(1), vidx); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 64 vidx = vbslq_u32(vm2, vmovq_n_u32(2), vidx); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 68 vidx = vbslq_u32(vm3, vmovq_n_u32(3), vidx); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 71 vst1q_u32(index, vidx); index += 4; in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 80 uint32x4_t vidx = vmovq_n_u32(0); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() local 84 vidx = vbslq_u32(vm1, vmovq_n_u32(1), vidx); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 88 vidx = vbslq_u32(vm2, vmovq_n_u32(2), vidx); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 92 vidx = vbslq_u32(vm3, vmovq_n_u32(3), vidx); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 95 uint32x2_t vidx_lo = vget_low_u32(vidx); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() [all …]
|
D | 9x-scalar-c1.c | 85 uint32_t vidx = 0; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() local 89 vidx = 1; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 94 vidx = 2; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 99 vidx = 3; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 104 vidx = 4; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 109 vidx = 5; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 114 vidx = 6; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 119 vidx = 7; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 124 vidx = 8; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 128 *index++ = vidx; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1()
|
/external/XNNPACK/src/f32-velu/ |
D | sse-rr2-lut16-p3.c.in | 60 …const __m128i vidx${ABC[N:N+4]} = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn${ABC[N:N+4]}), … 65 const uint64_t vidx${ABC[N:N+2]} = (uint64_t) _mm_cvtsi128_si64(vidx${ABC[N:N+4]}); 67 const uint64_t vidx${ABC[N+2:N+4]} = (uint64_t) _mm_extract_epi64(vidx${ABC[N:N+4]}, 1); 69 …const uint64_t vidx${ABC[N+2:N+4]} = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx${ABC[N:N… 70 …_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx${ABC[N:N+2]}))); 71 …i128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx${ABC[N+2:N+4]}))); 73 …, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx${ABC[N:N+2]} >> 32)… 75 …8(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx${ABC[N:N+2]} >> 32)… 78 …, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx${ABC[N+2:N+4]} >> 3… 80 …8(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx${ABC[N+2:N+4]} >> 3… [all …]
|
D | avx-rr2-lut16-p3.c.in | 58 const __m256 vidx${N} = _mm256_and_ps(vn${N}, vindex_mask); 60 … const __m128i vidx${N}_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx${N})), 2); 61 …const __m128i vidx${N}_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx${N}, 1)), 2… 63 const uint64_t vidx${N}_ll = (uint64_t) _mm_cvtsi128_si64(vidx${N}_lo); 64 const uint64_t vidx${N}_lh = (uint64_t) _mm_extract_epi64(vidx${N}_lo, 1); 65 const uint64_t vidx${N}_hl = (uint64_t) _mm_cvtsi128_si64(vidx${N}_hi); 66 const uint64_t vidx${N}_hh = (uint64_t) _mm_extract_epi64(vidx${N}_hi, 1); 67 …_mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx${N}_ll)); 68 …_mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx${N}_lh)); 69 …_mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx${N}_hl)); [all …]
|
/external/XNNPACK/src/f32-sigmoid/ |
D | sse-lut64-p2-div.c.in | 56 …const __m128i vidx${ABC[N:N+4]} = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn${ABC[N:N+4]}), … 60 const uint64_t vidx${ABC[N:N+2]} = (uint64_t) _mm_cvtsi128_si64(vidx${ABC[N:N+4]}); 62 const uint64_t vidx${ABC[N+2:N+4]} = (uint64_t) _mm_extract_epi64(vidx${ABC[N:N+4]}, 1); 64 …const uint64_t vidx${ABC[N+2:N+4]} = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx${ABC[N:N… 65 …_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx${ABC[N:N+2]}))); 66 …i128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx${ABC[N+2:N+4]}))); 68 …, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx${ABC[N:N+2]} >> 32)… 70 …8(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx${ABC[N:N+2]} >> 32)… 73 …, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx${ABC[N+2:N+4]} >> 3… 75 …8(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx${ABC[N+2:N+4]} >> 3… [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse2-lut64-p2-div-x4.c | 47 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 49 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 50 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 58 …(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_cvtsi128_si32(vidx)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 59 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 60 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 62 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 94 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local 96 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 97 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() [all …]
|
D | sse41-lut64-p2-div-x4.c | 47 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 49 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 50 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 56 …(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_cvtsi128_si32(vidx)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 57 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 58 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 2))), 1); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 59 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi16(vidx, 6))), 1); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 89 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local 91 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() 92 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() [all …]
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse2-rr2-lut16-p3-x4.c | 54 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 56 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 57 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 65 …(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 66 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 67 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 2)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 69 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 6)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 102 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 104 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 105 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() [all …]
|
D | velu-sse41-rr2-lut16-p3-x4.c | 54 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 56 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 57 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 63 …(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx)))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 64 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 4)))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 65 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 2))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 66 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi16(vidx, 6))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 97 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 99 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() 100 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() [all …]
|
/external/icu/icu4j/main/classes/core/src/com/ibm/icu/impl/ |
D | ICUDebug.java | 40 int i = 0, vidx = 0; in getInstanceLenient() local 45 if (vidx == 3) { in getInstanceLenient() 50 vidx++; in getInstanceLenient() 54 ver[vidx] = ver[vidx] * 10 + (c - '0'); in getInstanceLenient() 55 if (ver[vidx] > 255) { in getInstanceLenient() 59 ver[vidx] = 0; in getInstanceLenient() 64 ver[vidx] = c - '0'; in getInstanceLenient()
|
/external/icu/android_icu4j/src/main/java/android/icu/impl/ |
D | ICUDebug.java | 44 int i = 0, vidx = 0; in getInstanceLenient() local 49 if (vidx == 3) { in getInstanceLenient() 54 vidx++; in getInstanceLenient() 58 ver[vidx] = ver[vidx] * 10 + (c - '0'); in getInstanceLenient() 59 if (ver[vidx] > 255) { in getInstanceLenient() 63 ver[vidx] = 0; in getInstanceLenient() 68 ver[vidx] = c - '0'; in getInstanceLenient()
|
/external/XNNPACK/src/math/ |
D | expm1minus-sse2-rr2-lut16-p3.c | 71 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3() local 73 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3() 74 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3() 80 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3() 81 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx, 2); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3() 82 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3() 83 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx, 6); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3()
|
D | sigmoid-sse2-rr2-lut64-p2-div.c | 76 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div() local 78 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div() 79 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div() 85 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div() 86 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx, 2); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div() 87 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div() 88 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx, 6); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_div()
|
D | sigmoid-sse2-rr2-lut64-p2-nr2.c | 77 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2() local 79 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2() 80 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2() 86 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2() 87 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx, 2); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2() 88 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2() 89 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx, 6); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr2()
|
D | exp-sse2-rr2-lut64-p2.c | 69 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_math_f32_exp__sse2_rr2_lut64_p2() local 71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_exp__sse2_rr2_lut64_p2() 72 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_math_f32_exp__sse2_rr2_lut64_p2() 78 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx); in xnn_math_f32_exp__sse2_rr2_lut64_p2() 79 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx, 2); in xnn_math_f32_exp__sse2_rr2_lut64_p2() 80 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_exp__sse2_rr2_lut64_p2() 81 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx, 6); in xnn_math_f32_exp__sse2_rr2_lut64_p2()
|
D | sigmoid-sse2-rr2-lut64-p2-nr1.c | 77 const __m128i vidx = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn), vindex_mask), 2); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1() local 79 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1() 80 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1() 86 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1() 87 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx, 2); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1() 88 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx, 4); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1() 89 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx, 6); in xnn_math_f32_sigmoid__sse2_rr2_lut64_p2_nr1()
|