Home
last modified time | relevance | path

Searched refs:vout (Results 1 – 25 of 306) sorted by relevance

12345678910>>...13

/external/XNNPACK/src/u8-clamp/
Dneon-x64.c44 uint8x8_t vout = vld1_u8(x); x += 8; in xnn_u8_clamp_ukernel__neon_x64() local
45 vout = vmin_u8(vout, vget_low_u8(voutput_max)); in xnn_u8_clamp_ukernel__neon_x64()
46 vout = vmax_u8(vout, vget_low_u8(voutput_min)); in xnn_u8_clamp_ukernel__neon_x64()
47 vst1_u8(y, vout); y += 8; in xnn_u8_clamp_ukernel__neon_x64()
50 uint8x8_t vout = vld1_u8(x); in xnn_u8_clamp_ukernel__neon_x64() local
51 vout = vmin_u8(vout, vget_low_u8(voutput_max)); in xnn_u8_clamp_ukernel__neon_x64()
52 vout = vmax_u8(vout, vget_low_u8(voutput_min)); in xnn_u8_clamp_ukernel__neon_x64()
55 vst1_lane_u32(__builtin_assume_aligned(y, 1), vreinterpret_u32_u8(vout), 0); y += 4; in xnn_u8_clamp_ukernel__neon_x64()
56 vout = vext_u8(vout, vout, 4); in xnn_u8_clamp_ukernel__neon_x64()
59 vst1_lane_u16(__builtin_assume_aligned(y, 1), vreinterpret_u16_u8(vout), 0); y += 2; in xnn_u8_clamp_ukernel__neon_x64()
[all …]
Dsse2-x64.c45 __m128i vout = _mm_loadl_epi64((const __m128i*) x); in xnn_u8_clamp_ukernel__sse2_x64() local
47 vout = _mm_min_epu8(vout, voutput_max); in xnn_u8_clamp_ukernel__sse2_x64()
48 vout = _mm_max_epu8(vout, voutput_min); in xnn_u8_clamp_ukernel__sse2_x64()
49 _mm_storel_epi64((__m128i*) y, vout); in xnn_u8_clamp_ukernel__sse2_x64()
53 __m128i vout = _mm_loadl_epi64((const __m128i*) x); in xnn_u8_clamp_ukernel__sse2_x64() local
54 vout = _mm_min_epu8(vout, voutput_max); in xnn_u8_clamp_ukernel__sse2_x64()
55 vout = _mm_max_epu8(vout, voutput_min); in xnn_u8_clamp_ukernel__sse2_x64()
57 *((uint32_t*) y) = (uint32_t) _mm_cvtsi128_si32(vout); in xnn_u8_clamp_ukernel__sse2_x64()
59 vout = _mm_srli_epi64(vout, 32); in xnn_u8_clamp_ukernel__sse2_x64()
62 *((uint16_t*) y) = (uint16_t) _mm_extract_epi16(vout, 0); in xnn_u8_clamp_ukernel__sse2_x64()
[all …]
/external/XNNPACK/src/qs8-vaddc/
Dwasmsimd.c.in51 …v128_t vout${ABC[N:N+8]} = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc${ABC[N:N+4]}, vacc…
55 … v128_t vout${ABC[N:N+16]} = wasm_i8x16_narrow_i16x8(vout${ABC[N:N+8]}, vout${ABC[N+8:N+16]});
57 …v128_t vout${ABC[N:N+8]}${ABC[N:N+8]} = wasm_i8x16_narrow_i16x8(vout${ABC[N:N+8]}, vout${ABC[N:N+8…
61 vout${ABC[N:N+16]} = wasm_i8x16_max(vout${ABC[N:N+16]}, voutput_min);
63vout${ABC[N:N+8]}${ABC[N:N+8]} = wasm_i8x16_max(vout${ABC[N:N+8]}${ABC[N:N+8]}, voutput_min);
67 vout${ABC[N:N+16]} = wasm_i8x16_min(vout${ABC[N:N+16]}, voutput_max);
69vout${ABC[N:N+8]}${ABC[N:N+8]} = wasm_i8x16_min(vout${ABC[N:N+8]}${ABC[N:N+8]}, voutput_max);
72 wasm_v128_store(output, vout${ABC[0:16]});
74 *((double*) output) = wasm_f64x2_extract_lane(vout${ABC[0:8]}${ABC[0:8]}, 0);
77 wasm_v128_store(output + ${N}, vout${ABC[N:N+16]});
[all …]
Davx2-mul32-ld64.c.in57 …__m256i vout${ABC[N:N+4]}${ABC[N+8:N+12]}${ABC[N+4:N+8]}${ABC[N+12:N+16]} = _mm256_adds_epi16(_mm2…
59 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[N:N+8]…
61 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[N:N+8]…
65vout${ABC[N:N+4]}${ABC[N+8:N+12]}${ABC[N+4:N+8]}${ABC[N+12:N+16]} = _mm256_min_epi16(_mm256_max_ep…
67vout${ABC[N:N+8]} = _mm_min_epi16(_mm_max_epi16(vout${ABC[N:N+8]}, _mm256_castsi256_si128(voutput_…
69vout${ABC[N:N+8]} = _mm_min_epi16(_mm_max_epi16(vout${ABC[N:N+8]}, voutput_min), voutput_max);
73vout${ABC[N:N+16]} = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout${ABC[N:N+4]}${A…
75 … __m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N:N+8]});
78 _mm_storeu_si128((__m128i*) output, vout${ABC[0:16]});
80 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
[all …]
Dsse-mul32-ld32.c.in65 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[N:N+4]}, vacc${ABC[N+4:N+8]})…
68 vout${ABC[N:N+8]} = _mm_max_epi16(vout${ABC[N:N+8]}, voutput_min);
71 vout${ABC[N:N+8]} = _mm_min_epi16(vout${ABC[N:N+8]}, voutput_max);
75 const __m128i vout${ABC[N:N+16]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N+8:N+16]});
77 …const __m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N:N+8]…
80 _mm_storeu_si128((__m128i*) output, vout${ABC[0:16]});
82 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
85 _mm_storeu_si128((__m128i*) (output + ${N}), vout${ABC[N:N+16]});
87 _mm_storel_epi64((__m128i*) (output + ${N}), vout${ABC[N:N+8]}${ABC[N:N+8]});
110 …__m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[0:4]}, vacc${ABC[4:8]}), voutpu…
[all …]
Dneon-ld64.c.in60 …int8x16_t vout${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${ABC[N:N+8]}), vqmovn_s16(vacc${ABC[N+8…
62 int8x8_t vout${ABC[N:N+8]} = vqmovn_s16(vacc${ABC[N:N+8]});
66 vout${ABC[N:N+16]} = vmaxq_s8(vout${ABC[N:N+16]}, voutput_min);
68 vout${ABC[N:N+8]} = vmax_s8(vout${ABC[N:N+8]}, vget_low_s8(voutput_min));
72 vout${ABC[N:N+16]} = vminq_s8(vout${ABC[N:N+16]}, voutput_max);
74 vout${ABC[N:N+8]} = vmin_s8(vout${ABC[N:N+8]}, vget_low_s8(voutput_max));
78 vst1q_s8(output, vout${ABC[N:N+16]}); output += 16;
80 vst1_s8(output, vout${ABC[N:N+8]}); output += 8;
102 int8x8_t vout${ABC[0:8]} = vqmovn_s16(vacc${ABC[0:8]});
103 vout${ABC[0:8]} = vmax_s8(vout${ABC[0:8]}, vget_low_s8(voutput_min));
[all …]
Dsse-mul16-ld64.c.in73 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[N:N+4]}, vacc${ABC[N+4:N+8]})…
76 vout${ABC[N:N+8]} = _mm_max_epi16(vout${ABC[N:N+8]}, voutput_min);
79 vout${ABC[N:N+8]} = _mm_min_epi16(vout${ABC[N:N+8]}, voutput_max);
83 const __m128i vout${ABC[N:N+16]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N+8:N+16]});
85 …const __m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N:N+8]…
88 _mm_storeu_si128((__m128i*) output, vout${ABC[0:16]});
90 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
93 _mm_storeu_si128((__m128i*) (output + ${N}), vout${ABC[N:N+16]});
95 _mm_storel_epi64((__m128i*) (output + ${N}), vout${ABC[N:N+8]}${ABC[N:N+8]});
126 …__m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[0:4]}, vacc${ABC[4:8]}), voutpu…
[all …]
/external/XNNPACK/src/qs8-vadd/
Dwasmsimd.c.in57 …v128_t vout${ABC[N:N+8]} = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc${ABC[N:N+4]}, vacc…
61 … v128_t vout${ABC[N:N+16]} = wasm_i8x16_narrow_i16x8(vout${ABC[N:N+8]}, vout${ABC[N+8:N+16]});
63 …v128_t vout${ABC[N:N+8]}${ABC[N:N+8]} = wasm_i8x16_narrow_i16x8(vout${ABC[N:N+8]}, vout${ABC[N:N+8…
67 vout${ABC[N:N+16]} = wasm_i8x16_max(vout${ABC[N:N+16]}, voutput_min);
69vout${ABC[N:N+8]}${ABC[N:N+8]} = wasm_i8x16_max(vout${ABC[N:N+8]}${ABC[N:N+8]}, voutput_min);
73 vout${ABC[N:N+16]} = wasm_i8x16_min(vout${ABC[N:N+16]}, voutput_max);
75vout${ABC[N:N+8]}${ABC[N:N+8]} = wasm_i8x16_min(vout${ABC[N:N+8]}${ABC[N:N+8]}, voutput_max);
78 wasm_v128_store(output, vout${ABC[0:16]});
80 *((double*) output) = wasm_f64x2_extract_lane(vout${ABC[0:8]}${ABC[0:8]}, 0);
83 wasm_v128_store(output + ${N}, vout${ABC[N:N+16]});
[all …]
Davx2-mul32-ld64.c.in62 …__m256i vout${ABC[N:N+4]}${ABC[N+8:N+12]}${ABC[N+4:N+8]}${ABC[N+12:N+16]} = _mm256_adds_epi16(_mm2…
64 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[N:N+8]…
66 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[N:N+8]…
70vout${ABC[N:N+4]}${ABC[N+8:N+12]}${ABC[N+4:N+8]}${ABC[N+12:N+16]} = _mm256_min_epi16(_mm256_max_ep…
72vout${ABC[N:N+8]} = _mm_min_epi16(_mm_max_epi16(vout${ABC[N:N+8]}, _mm256_castsi256_si128(voutput_…
74vout${ABC[N:N+8]} = _mm_min_epi16(_mm_max_epi16(vout${ABC[N:N+8]}, voutput_min), voutput_max);
78vout${ABC[N:N+16]} = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout${ABC[N:N+4]}${A…
80 … __m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N:N+8]});
83 _mm_storeu_si128((__m128i*) output, vout${ABC[0:16]});
85 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
[all …]
Dneon-ld64.c.in61 …int8x16_t vout${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${ABC[N:N+8]}), vqmovn_s16(vacc${ABC[N+8…
63 int8x8_t vout${ABC[N:N+8]} = vqmovn_s16(vacc${ABC[N:N+8]});
67 vout${ABC[N:N+16]} = vmaxq_s8(vout${ABC[N:N+16]}, voutput_min);
69 vout${ABC[N:N+8]} = vmax_s8(vout${ABC[N:N+8]}, vget_low_s8(voutput_min));
73 vout${ABC[N:N+16]} = vminq_s8(vout${ABC[N:N+16]}, voutput_max);
75 vout${ABC[N:N+8]} = vmin_s8(vout${ABC[N:N+8]}, vget_low_s8(voutput_max));
79 vst1q_s8(output, vout${ABC[N:N+16]}); output += 16;
81 vst1_s8(output, vout${ABC[N:N+8]}); output += 8;
109 int8x8_t vout${ABC[0:8]} = vqmovn_s16(vacc${ABC[0:8]});
110 vout${ABC[0:8]} = vmax_s8(vout${ABC[0:8]}, vget_low_s8(voutput_min));
[all …]
Dsse-mul32-ld32.c.in72 …__m128i vout${ABC[N:N+8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[N:N+4]}, vacc${ABC[N+4:N+8]})…
75 vout${ABC[N:N+8]} = _mm_max_epi16(vout${ABC[N:N+8]}, voutput_min);
78 vout${ABC[N:N+8]} = _mm_min_epi16(vout${ABC[N:N+8]}, voutput_max);
82 const __m128i vout${ABC[N:N+16]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N+8:N+16]});
84 …const __m128i vout${ABC[N:N+8]}${ABC[N:N+8]} = _mm_packs_epi16(vout${ABC[N:N+8]}, vout${ABC[N:N+8]…
87 _mm_storeu_si128((__m128i*) output, vout${ABC[0:16]});
89 _mm_storel_epi64((__m128i*) output, vout${ABC[0:8]}${ABC[0:8]});
92 _mm_storeu_si128((__m128i*) (output + ${N}), vout${ABC[N:N+16]});
94 _mm_storel_epi64((__m128i*) (output + ${N}), vout${ABC[N:N+8]}${ABC[N:N+8]});
126 …__m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[0:4]}, vacc${ABC[4:8]}), voutpu…
[all …]
/external/XNNPACK/src/qs8-dwconv/
Dunipass-neon-mul16.c.in82 …int8x16_t vout${ABC[C:C+16]} = vqmovn_high_s16(vqmovn_s16(vacc${ABC[C:C+8]}), vacc${ABC[C+8:C+16]}…
84 int8x8_t vout${ABC[C:C+8]} = vqmovn_s16(vacc${ABC[C:C+8]});
91 …int8x16_t vout${ABC[C:C+16]} = vcombine_s8(vqmovn_s16(vacc${ABC[C:C+8]}), vqmovn_s16(vacc${ABC[C+8…
93 int8x8_t vout${ABC[C:C+8]} = vqmovn_s16(vacc${ABC[C:C+8]});
98 vout${ABC[C:C+16]} = vmaxq_s8(vout${ABC[C:C+16]}, voutput_min);
101 vout${ABC[C:C+8]} = vmax_s8(vout${ABC[C:C+8]}, voutput_min);
103 vout${ABC[C:C+8]} = vmax_s8(vout${ABC[C:C+8]}, vget_low_s8(voutput_min));
107 vout${ABC[C:C+16]} = vminq_s8(vout${ABC[C:C+16]}, voutput_max);
110 vout${ABC[C:C+8]} = vmin_s8(vout${ABC[C:C+8]}, voutput_max);
112 vout${ABC[C:C+8]} = vmin_s8(vout${ABC[C:C+8]}, vget_low_s8(voutput_max));
[all …]
/external/XNNPACK/src/f32-gavgpool/
D7x-minmax-wasmsimd-x86-c4.c80 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4() local
81 const v128_t vufmask = wasm_f32x4_lt(vout, vmin); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4()
82 const v128_t vofmask = wasm_f32x4_le(vmax, vout); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4()
83 vout = wasm_v128_bitselect(vmin, vout, vufmask); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4()
84 vout = wasm_v128_bitselect(vmax, vout, vofmask); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4()
86 wasm_v128_store(output, vout); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4()
109 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4() local
110 const v128_t vufmask = wasm_f32x4_lt(vout, vmin); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4()
111 const v128_t vofmask = wasm_f32x4_le(vmax, vout); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4()
112 vout = wasm_v128_bitselect(vmin, vout, vufmask); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_x86_c4()
[all …]
D7x-minmax-sse-c4.c80 __m128 vout = _mm_mul_ps(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4() local
81 vout = _mm_max_ps(vout, vmin); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4()
82 vout = _mm_min_ps(vout, vmax); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4()
84 _mm_storeu_ps(output, vout); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4()
107 __m128 vout = _mm_mul_ps(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4() local
108 vout = _mm_max_ps(vout, vmin); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4()
109 vout = _mm_min_ps(vout, vmax); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4()
112 _mm_storel_pi((__m64*) output, vout); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4()
113 vout = _mm_movehl_ps(vout, vout); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4()
117 _mm_store_ss(output, vout); in xnn_f32_gavgpool_minmax_ukernel_7x__sse_c4()
D7x-minmax-wasmsimd-arm-c4.c80 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4() local
81 vout = wasm_f32x4_max(vout, vmin); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4()
82 vout = wasm_f32x4_min(vout, vmax); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4()
84 wasm_v128_store(output, vout); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4()
107 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4() local
108 vout = wasm_f32x4_max(vout, vmin); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4()
109 vout = wasm_f32x4_min(vout, vmax); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4()
112 *((double*) output) = wasm_f64x2_extract_lane(vout, 0); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4()
114 vout = wasm_v32x4_shuffle(vout, vout, 2, 3, 2, 3); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4()
117 *output++ = wasm_f32x4_extract_lane(vout, 0); in xnn_f32_gavgpool_minmax_ukernel_7x__wasmsimd_arm_c4()
/external/XNNPACK/src/qs8-igemm/
DMRxNRc4-neondot.c.in138 …int8x16_t vout${M}x${ABC[N:N+16]} = vqmovn_high_s16(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vacc${M}x$…
140 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vqmovn_high_s16(vqmovn_s16(vacc${M-1}x${AB…
142 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
151 …int8x16_t vout${M}x${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vqmovn_s16(vac…
153 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vcombine_s8(vqmovn_s16(vacc${M-1}x${ABC[N:…
155 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
167 vout${M}x${ABC[N:N+16]} = vmaxq_s8(vout${M}x${ABC[N:N+16]}, voutput_min);
169vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vmaxq_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}…
172 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, voutput_min);
174 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, vget_low_s8(voutput_min));
[all …]
Dc16-neon-mlal-padal.c.in141 …int8x16_t vout${M}x${ABC[N:N+16]} = vqmovn_high_s16(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vacc${M}x$…
143 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vqmovn_high_s16(vqmovn_s16(vacc${M-1}x${AB…
145 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
154 …int8x16_t vout${M}x${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vqmovn_s16(vac…
156 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vcombine_s8(vqmovn_s16(vacc${M-1}x${ABC[N:…
158 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
170 vout${M}x${ABC[N:N+16]} = vmaxq_s8(vout${M}x${ABC[N:N+16]}, voutput_min);
172vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vmaxq_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}…
175 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, voutput_min);
177 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, vget_low_s8(voutput_min));
[all …]
Dc8-neon-mull-padal.c.in159 …int8x16_t vout${M}x${ABC[N:N+16]} = vqmovn_high_s16(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vacc${M}x$…
161 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vqmovn_high_s16(vqmovn_s16(vacc${M-1}x${AB…
163 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
172 …int8x16_t vout${M}x${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vqmovn_s16(vac…
174 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vcombine_s8(vqmovn_s16(vacc${M-1}x${ABC[N:…
176 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
188 vout${M}x${ABC[N:N+16]} = vmaxq_s8(vout${M}x${ABC[N:N+16]}, voutput_min);
190vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vmaxq_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}…
193 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, voutput_min);
195 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, vget_low_s8(voutput_min));
[all …]
/external/XNNPACK/src/qs8-gemm/
DMRxNRc4-neondot.c.in131 …int8x16_t vout${M}x${ABC[N:N+16]} = vqmovn_high_s16(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vacc${M}x$…
133 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vqmovn_high_s16(vqmovn_s16(vacc${M-1}x${AB…
135 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
144 …int8x16_t vout${M}x${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vqmovn_s16(vac…
146 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vcombine_s8(vqmovn_s16(vacc${M-1}x${ABC[N:…
148 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
160 vout${M}x${ABC[N:N+16]} = vmaxq_s8(vout${M}x${ABC[N:N+16]}, voutput_min);
162vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vmaxq_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}…
165 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, voutput_min);
167 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, vget_low_s8(voutput_min));
[all …]
Dc16-neon-mlal-padal.c.in130 …int8x16_t vout${M}x${ABC[N:N+16]} = vqmovn_high_s16(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vacc${M}x$…
132 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vqmovn_high_s16(vqmovn_s16(vacc${M-1}x${AB…
134 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
143 …int8x16_t vout${M}x${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vqmovn_s16(vac…
145 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vcombine_s8(vqmovn_s16(vacc${M-1}x${ABC[N:…
147 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
159 vout${M}x${ABC[N:N+16]} = vmaxq_s8(vout${M}x${ABC[N:N+16]}, voutput_min);
161vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vmaxq_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}…
164 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, voutput_min);
166 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, vget_low_s8(voutput_min));
[all …]
Dc8-neon-mull-padal.c.in148 …int8x16_t vout${M}x${ABC[N:N+16]} = vqmovn_high_s16(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vacc${M}x$…
150 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vqmovn_high_s16(vqmovn_s16(vacc${M-1}x${AB…
152 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
161 …int8x16_t vout${M}x${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${M}x${ABC[N:N+8]}), vqmovn_s16(vac…
163 …int8x16_t vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vcombine_s8(vqmovn_s16(vacc${M-1}x${ABC[N:…
165 int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
177 vout${M}x${ABC[N:N+16]} = vmaxq_s8(vout${M}x${ABC[N:N+16]}, voutput_min);
179vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]} = vmaxq_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}…
182 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, voutput_min);
184 vout${M}x${ABC[N:N+8]} = vmax_s8(vout${M}x${ABC[N:N+8]}, vget_low_s8(voutput_min));
[all …]
/external/XNNPACK/src/f32-avgpool/
D9x-minmax-wasmsimd-x86-c4.c136 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4() local
137 const v128_t vufmask = wasm_f32x4_lt(vout, vmin); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
138 const v128_t vofmask = wasm_f32x4_le(vmax, vout); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
139 vout = wasm_v128_bitselect(vmin, vout, vufmask); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
140 vout = wasm_v128_bitselect(vmax, vout, vofmask); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
142 wasm_v128_store(output, vout); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
167 v128_t vout = wasm_f32x4_mul(vsum, vscale); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4() local
168 const v128_t vufmask = wasm_f32x4_lt(vout, vmin); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
169 const v128_t vofmask = wasm_f32x4_le(vmax, vout); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
170 vout = wasm_v128_bitselect(vmin, vout, vufmask); in xnn_f32_avgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
[all …]
/external/XNNPACK/src/f32-pavgpool/
D9x-minmax-wasmsimd-x86-c4.c139 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4() local
140 const v128_t vufmask = wasm_f32x4_lt(vout, vmin); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
141 const v128_t vofmask = wasm_f32x4_le(vmax, vout); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
142 vout = wasm_v128_bitselect(vmin, vout, vufmask); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
143 vout = wasm_v128_bitselect(vmax, vout, vofmask); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
145 wasm_v128_store(output, vout); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
170 v128_t vout = wasm_f32x4_mul(vsum, vmultiplier); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4() local
171 const v128_t vufmask = wasm_f32x4_lt(vout, vmin); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
172 const v128_t vofmask = wasm_f32x4_le(vmax, vout); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
173 vout = wasm_v128_bitselect(vmin, vout, vufmask); in xnn_f32_pavgpool_minmax_ukernel_9x__wasmsimd_x86_c4()
[all …]
/external/XNNPACK/src/u8-maxpool/
D9p8x-minmax-sse2-c16.c100 const __m128i vout = _mm_max_epu8(_mm_min_epu8(vmax, voutput_max), voutput_min); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() local
102 _mm_storeu_si128((__m128i*) o, vout); o += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
123 __m128i vout = _mm_max_epu8(_mm_min_epu8(vmax, voutput_max), voutput_min); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() local
126 _mm_storel_epi64((__m128i*) o, vout); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
127 vout = _mm_unpackhi_epi64(vout, vout); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
131 *((uint32_t*) o) = (uint32_t) _mm_cvtsi128_si32(vout); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
132 vout = _mm_srli_epi64(vout, 32); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
136 *((uint16_t*) o) = (uint16_t) _mm_extract_epi16(vout, 0); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
137 vout = _mm_srli_epi32(vout, 16); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
141 *((uint8_t*) o) = (uint8_t) _mm_cvtsi128_si32(vout); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
[all …]
/external/XNNPACK/src/qu8-gavgpool/
D7x-minmax-sse2-c8.c120 __m128i vout = _mm_packs_epi32(vscaled_lo, vscaled_hi); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() local
121 vout = _mm_adds_epi16(vout, _mm_load_si128((const __m128i*) params->sse2.output_zero_point)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
122 vout = _mm_packus_epi16(vout, vout); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
123 vout = _mm_min_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_max)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
124 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
126 _mm_storel_epi64((__m128i*) output, vout); output += 8; in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
189 __m128i vout = _mm_packs_epi32(vscaled_lo, vscaled_hi); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() local
190 vout = _mm_adds_epi16(vout, _mm_load_si128((const __m128i*) params->sse2.output_zero_point)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
191 vout = _mm_packus_epi16(vout, vout); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
192 vout = _mm_min_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_max)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
[all …]

12345678910>>...13