Home
last modified time | relevance | path

Searched refs:vtl01234567 (Results 1 – 25 of 28) sorted by relevance

12

/external/XNNPACK/src/s8-ibilinear/gen/
Dsse2-c16.c52 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_s8_ibilinear_ukernel__sse2_c16() local
65 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); in xnn_s8_ibilinear_ukernel__sse2_c16()
75 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c16()
76 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse2_c16()
77 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c16()
117 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_s8_ibilinear_ukernel__sse2_c16() local
126 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); in xnn_s8_ibilinear_ukernel__sse2_c16()
132 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c16()
133 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse2_c16()
134 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c16()
[all …]
Dsse2-c8.c52 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_s8_ibilinear_ukernel__sse2_c8() local
61 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); in xnn_s8_ibilinear_ukernel__sse2_c8()
67 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c8()
68 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse2_c8()
69 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c8()
94 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_s8_ibilinear_ukernel__sse2_c8() local
99 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); in xnn_s8_ibilinear_ukernel__sse2_c8()
105 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c8()
106 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse2_c8()
107 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c8()
Dwasmsimd-mul32-c16.c45 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() local
58 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16()
60 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16()
67 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16()
68 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16()
100 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() local
109 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16()
111 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16()
114 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16()
115 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16()
[all …]
Dwasmsimd-mul32-c8.c45 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() local
54 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
56 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
59 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
60 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
81 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() local
86 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
88 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
91 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
92 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
Dwasmsimd-dot16x2-c16.c50 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local
64 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
65 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
66 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
101 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local
111 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
112 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
113 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
135 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local
141 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
[all …]
Dsse41-c16.c51 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c16() local
66 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16()
67 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse41_c16()
68 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16()
103 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c16() local
114 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16()
115 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse41_c16()
116 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16()
138 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c16() local
145 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16()
[all …]
Dwasmsimd-dot16x2-c8.c50 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() local
60 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
61 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
62 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
84 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() local
90 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
91 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
92 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
Dsse41-c8.c51 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c8() local
62 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c8()
63 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse41_c8()
64 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c8()
86 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c8() local
93 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c8()
94 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse41_c8()
95 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c8()
Dneon-c16.c46 const int8x8_t vtl01234567 = vld1_s8(i0); i0 += 8; in xnn_s8_ibilinear_ukernel__neon_c16() local
55 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16()
57 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16()
58 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16()
109 const int8x8_t vtl01234567 = vld1_s8(i0); i0 += 8; in xnn_s8_ibilinear_ukernel__neon_c16() local
114 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16()
116 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16()
117 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16()
149 const int8x8_t vtl01234567 = vld1_s8(i0); in xnn_s8_ibilinear_ukernel__neon_c16() local
154 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16()
[all …]
Dneon-c8.c46 const int8x8_t vtl01234567 = vld1_s8(i0); i0 += 8; in xnn_s8_ibilinear_ukernel__neon_c8() local
51 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8()
53 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8()
54 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8()
86 const int8x8_t vtl01234567 = vld1_s8(i0); in xnn_s8_ibilinear_ukernel__neon_c8() local
91 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8()
93 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8()
94 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8()
/external/XNNPACK/src/u8-ibilinear/gen/
Dsse2-c16.c52 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_u8_ibilinear_ukernel__sse2_c16() local
66 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); in xnn_u8_ibilinear_ukernel__sse2_c16()
76 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c16()
77 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse2_c16()
78 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c16()
118 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_u8_ibilinear_ukernel__sse2_c16() local
128 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); in xnn_u8_ibilinear_ukernel__sse2_c16()
134 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c16()
135 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse2_c16()
136 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c16()
[all …]
Dsse2-c8.c52 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_u8_ibilinear_ukernel__sse2_c8() local
62 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); in xnn_u8_ibilinear_ukernel__sse2_c8()
68 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c8()
69 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse2_c8()
70 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c8()
95 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_u8_ibilinear_ukernel__sse2_c8() local
101 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); in xnn_u8_ibilinear_ukernel__sse2_c8()
107 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c8()
108 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse2_c8()
109 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c8()
Dwasmsimd-mul32-c16.c45 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() local
58 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16()
60 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16()
67 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16()
68 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16()
100 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() local
109 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16()
111 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16()
114 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16()
115 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16()
[all …]
Dwasmsimd-mul32-c8.c45 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() local
54 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
56 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
59 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
60 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
81 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() local
86 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
88 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
91 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
92 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
Dwasmsimd-dot16x2-c16.c50 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local
64 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
65 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
66 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
101 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local
111 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
112 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
113 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
135 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local
141 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16()
[all …]
Dsse41-c16.c51 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c16() local
66 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16()
67 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse41_c16()
68 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16()
103 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c16() local
114 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16()
115 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse41_c16()
116 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16()
138 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c16() local
145 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16()
[all …]
Dwasmsimd-dot16x2-c8.c50 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() local
60 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
61 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
62 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
84 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() local
90 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
91 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
92 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
Dsse41-c8.c51 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c8() local
62 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c8()
63 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse41_c8()
64 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c8()
86 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c8() local
93 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c8()
94 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse41_c8()
95 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c8()
Dneon-c16.c46 const uint8x8_t vtl01234567 = vld1_u8(i0); i0 += 8; in xnn_u8_ibilinear_ukernel__neon_c16() local
55 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16()
57 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16()
58 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16()
109 const uint8x8_t vtl01234567 = vld1_u8(i0); i0 += 8; in xnn_u8_ibilinear_ukernel__neon_c16() local
114 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16()
116 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16()
117 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16()
149 const uint8x8_t vtl01234567 = vld1_u8(i0); in xnn_u8_ibilinear_ukernel__neon_c16() local
154 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16()
[all …]
Dneon-c8.c46 const uint8x8_t vtl01234567 = vld1_u8(i0); i0 += 8; in xnn_u8_ibilinear_ukernel__neon_c8() local
51 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8()
53 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8()
54 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8()
86 const uint8x8_t vtl01234567 = vld1_u8(i0); in xnn_u8_ibilinear_ukernel__neon_c8() local
91 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8()
93 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8()
94 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8()
/external/XNNPACK/src/s8-ibilinear/
Dsse.c.in164 const __m128i vtl01234567 = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i0));
173 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0);
185 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero);
190 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8);
196 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah);
197 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567);
198 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah);
228 const __m128i vtl01234567 = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i0));
233 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0);
241 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero);
[all …]
Dneon.c.in118 const ${XINT8X8_T} vtl01234567 = ${VLD1_X8}(i0); i0 += 8;
124 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567);
126 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567);
127 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567);
129 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567));
131 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567));
132 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567));
167 const ${XINT8X8_T} vtl01234567 = ${VLD1_X8}(i0);
173 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567);
175 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567);
[all …]
Dwasmsimd-mul32.c.in104 const v128_t vtl01234567 = ${WASM_X16X8_LOAD_8X8}(i0);
113 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567);
115 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567);
118 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),…
119 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)…
140 const v128_t vtl01234567 = ${WASM_X16X8_LOAD_8X8}(i0);
145 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567);
147 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567);
150 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),…
151 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)…
Dwasmsimd-dot16x2.c.in107 const v128_t vtl01234567 = ${WASM_X16X8_LOAD_8X8}(i0);
117 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9…
118 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567);
119 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, …
141 const v128_t vtl01234567 = ${WASM_X16X8_LOAD_8X8}(i0);
147 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9…
148 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567);
149 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, …
/external/XNNPACK/src/f16-ibilinear-chw/gen/
Dneonfp16arith-p8.c91 const float16x8_t vtl01234567 = vtl_t01234567.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() local
94 const float16x8_t vl01234567 = vfmaq_f16(vtl01234567, vld01234567, valphav01234567); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8()

12