/external/XNNPACK/src/s8-ibilinear/gen/ |
D | sse2-c16.c | 52 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_s8_ibilinear_ukernel__sse2_c16() local 65 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); in xnn_s8_ibilinear_ukernel__sse2_c16() 75 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c16() 76 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse2_c16() 77 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c16() 117 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_s8_ibilinear_ukernel__sse2_c16() local 126 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); in xnn_s8_ibilinear_ukernel__sse2_c16() 132 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c16() 133 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse2_c16() 134 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c16() [all …]
|
D | sse2-c8.c | 52 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_s8_ibilinear_ukernel__sse2_c8() local 61 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); in xnn_s8_ibilinear_ukernel__sse2_c8() 67 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c8() 68 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse2_c8() 69 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c8() 94 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_s8_ibilinear_ukernel__sse2_c8() local 99 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); in xnn_s8_ibilinear_ukernel__sse2_c8() 105 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c8() 106 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse2_c8() 107 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse2_c8()
|
D | wasmsimd-mul32-c16.c | 45 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() local 58 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() 60 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() 67 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() 68 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() 100 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() local 109 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() 111 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() 114 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() 115 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16() [all …]
|
D | wasmsimd-mul32-c8.c | 45 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() local 54 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() 56 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() 59 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() 60 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() 81 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() local 86 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() 88 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() 91 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8() 92 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8()
|
D | wasmsimd-dot16x2-c16.c | 50 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local 64 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 65 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 66 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 101 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local 111 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 112 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 113 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 135 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local 141 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16() [all …]
|
D | sse41-c16.c | 51 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c16() local 66 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16() 67 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse41_c16() 68 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16() 103 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c16() local 114 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16() 115 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse41_c16() 116 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16() 138 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c16() local 145 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c16() [all …]
|
D | wasmsimd-dot16x2-c8.c | 50 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() local 60 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 61 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 62 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 84 const v128_t vtl01234567 = wasm_i16x8_load8x8(i0); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() local 90 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 91 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 92 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
|
D | sse41-c8.c | 51 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c8() local 62 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c8() 63 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse41_c8() 64 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c8() 86 const __m128i vtl01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_s8_ibilinear_ukernel__sse41_c8() local 93 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c8() 94 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__sse41_c8() 95 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_s8_ibilinear_ukernel__sse41_c8()
|
D | neon-c16.c | 46 const int8x8_t vtl01234567 = vld1_s8(i0); i0 += 8; in xnn_s8_ibilinear_ukernel__neon_c16() local 55 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16() 57 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16() 58 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16() 109 const int8x8_t vtl01234567 = vld1_s8(i0); i0 += 8; in xnn_s8_ibilinear_ukernel__neon_c16() local 114 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16() 116 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16() 117 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16() 149 const int8x8_t vtl01234567 = vld1_s8(i0); in xnn_s8_ibilinear_ukernel__neon_c16() local 154 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c16() [all …]
|
D | neon-c8.c | 46 const int8x8_t vtl01234567 = vld1_s8(i0); i0 += 8; in xnn_s8_ibilinear_ukernel__neon_c8() local 51 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8() 53 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8() 54 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8() 86 const int8x8_t vtl01234567 = vld1_s8(i0); in xnn_s8_ibilinear_ukernel__neon_c8() local 91 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8() 93 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8() 94 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); in xnn_s8_ibilinear_ukernel__neon_c8()
|
/external/XNNPACK/src/u8-ibilinear/gen/ |
D | sse2-c16.c | 52 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_u8_ibilinear_ukernel__sse2_c16() local 66 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); in xnn_u8_ibilinear_ukernel__sse2_c16() 76 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c16() 77 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse2_c16() 78 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c16() 118 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_u8_ibilinear_ukernel__sse2_c16() local 128 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); in xnn_u8_ibilinear_ukernel__sse2_c16() 134 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c16() 135 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse2_c16() 136 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c16() [all …]
|
D | sse2-c8.c | 52 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_u8_ibilinear_ukernel__sse2_c8() local 62 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); in xnn_u8_ibilinear_ukernel__sse2_c8() 68 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c8() 69 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse2_c8() 70 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c8() 95 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_u8_ibilinear_ukernel__sse2_c8() local 101 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); in xnn_u8_ibilinear_ukernel__sse2_c8() 107 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c8() 108 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse2_c8() 109 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse2_c8()
|
D | wasmsimd-mul32-c16.c | 45 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() local 58 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() 60 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() 67 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() 68 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() 100 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() local 109 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() 111 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() 114 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() 115 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16() [all …]
|
D | wasmsimd-mul32-c8.c | 45 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() local 54 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() 56 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() 59 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() 60 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() 81 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() local 86 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() 88 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() 91 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8() 92 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… in xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8()
|
D | wasmsimd-dot16x2-c16.c | 50 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local 64 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 65 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 66 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 101 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local 111 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 112 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 113 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() 135 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() local 141 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16() [all …]
|
D | sse41-c16.c | 51 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c16() local 66 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16() 67 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse41_c16() 68 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16() 103 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c16() local 114 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16() 115 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse41_c16() 116 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16() 138 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c16() local 145 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c16() [all …]
|
D | wasmsimd-dot16x2-c8.c | 50 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() local 60 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 61 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 62 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 84 const v128_t vtl01234567 = wasm_u16x8_load8x8(i0); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() local 90 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 91 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8() 92 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … in xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8()
|
D | sse41-c8.c | 51 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c8() local 62 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c8() 63 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse41_c8() 64 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c8() 86 const __m128i vtl01234567 = _mm_cvtepu8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_u8_ibilinear_ukernel__sse41_c8() local 93 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c8() 94 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); in xnn_u8_ibilinear_ukernel__sse41_c8() 95 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); in xnn_u8_ibilinear_ukernel__sse41_c8()
|
D | neon-c16.c | 46 const uint8x8_t vtl01234567 = vld1_u8(i0); i0 += 8; in xnn_u8_ibilinear_ukernel__neon_c16() local 55 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16() 57 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16() 58 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16() 109 const uint8x8_t vtl01234567 = vld1_u8(i0); i0 += 8; in xnn_u8_ibilinear_ukernel__neon_c16() local 114 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16() 116 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16() 117 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16() 149 const uint8x8_t vtl01234567 = vld1_u8(i0); in xnn_u8_ibilinear_ukernel__neon_c16() local 154 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c16() [all …]
|
D | neon-c8.c | 46 const uint8x8_t vtl01234567 = vld1_u8(i0); i0 += 8; in xnn_u8_ibilinear_ukernel__neon_c8() local 51 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8() 53 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8() 54 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8() 86 const uint8x8_t vtl01234567 = vld1_u8(i0); in xnn_u8_ibilinear_ukernel__neon_c8() local 91 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8() 93 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8() 94 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); in xnn_u8_ibilinear_ukernel__neon_c8()
|
/external/XNNPACK/src/s8-ibilinear/ |
D | sse.c.in | 164 const __m128i vtl01234567 = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i0)); 173 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); 185 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); 190 vtl01234567 = _mm_srai_epi16(_mm_unpacklo_epi8(vtl01234567, vtl01234567), 8); 196 const __m128i vt0123 = _mm_madd_epi16(_mm_unpacklo_epi16(vtr01234567, vtl01234567), valphah); 197 const __m128i vdl01234567 = _mm_sub_epi16(vbl01234567, vtl01234567); 198 const __m128i vt4567 = _mm_madd_epi16(_mm_unpackhi_epi16(vtr01234567, vtl01234567), valphah); 228 const __m128i vtl01234567 = ${_MM_CVTEPX8_EPI16}(_mm_loadl_epi64((const __m128i*) i0)); 233 __m128i vtl01234567 = _mm_loadl_epi64((const __m128i*) i0); 241 vtl01234567 = _mm_unpacklo_epi8(vtl01234567, vzero); [all …]
|
D | neon.c.in | 118 const ${XINT8X8_T} vtl01234567 = ${VLD1_X8}(i0); i0 += 8; 124 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); 126 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); 127 const int16x8_t vxtl01234567 = vmovl_s8(vtl01234567); 129 const int16x8_t vtd01234567 = vreinterpretq_s16_u16(vsubl_u8(vtr01234567, vtl01234567)); 131 const int16x8_t vdl01234567 = vreinterpretq_s16_u16(vsubl_u8(vbl01234567, vtl01234567)); 132 const int16x8_t vxtl01234567 = vreinterpretq_s16_u16(vmovl_u8(vtl01234567)); 167 const ${XINT8X8_T} vtl01234567 = ${VLD1_X8}(i0); 173 const int16x8_t vtd01234567 = vsubl_s8(vtr01234567, vtl01234567); 175 const int16x8_t vdl01234567 = vsubl_s8(vbl01234567, vtl01234567); [all …]
|
D | wasmsimd-mul32.c.in | 104 const v128_t vtl01234567 = ${WASM_X16X8_LOAD_8X8}(i0); 113 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); 115 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); 118 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… 119 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)… 140 const v128_t vtl01234567 = ${WASM_X16X8_LOAD_8X8}(i0); 145 const v128_t vtd01234567 = wasm_i16x8_sub(vtr01234567, vtl01234567); 147 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); 150 …const v128_t vt0123 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_low_i16x8(vtl01234567), 11),… 151 …const v128_t vt4567 = wasm_i32x4_add(wasm_i32x4_shl(wasm_i32x4_extend_high_i16x8(vtl01234567), 11)…
|
D | wasmsimd-dot16x2.c.in | 107 const v128_t vtl01234567 = ${WASM_X16X8_LOAD_8X8}(i0); 117 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… 118 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); 119 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, … 141 const v128_t vtl01234567 = ${WASM_X16X8_LOAD_8X8}(i0); 147 …const v128_t vt0123 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 0, 8, 1, 9… 148 const v128_t vdl01234567 = wasm_i16x8_sub(vbl01234567, vtl01234567); 149 …const v128_t vt4567 = wasm_i32x4_dot_i16x8(wasm_v16x8_shuffle(vtr01234567, vtl01234567, 4, 12, 5, …
|
/external/XNNPACK/src/f16-ibilinear-chw/gen/ |
D | neonfp16arith-p8.c | 91 const float16x8_t vtl01234567 = vtl_t01234567.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() local 94 const float16x8_t vl01234567 = vfmaq_f16(vtl01234567, vld01234567, valphav01234567); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8()
|