/external/XNNPACK/src/x24-transposec/ |
D | 4x4-ssse3.c | 85 const __m128i v3_0 = _mm_unpacklo_epi8(v1_0, v1_2); in xnn_x24_transposec_ukernel__4x4_ssse3() local 133 const __m128i v3_0 = _mm_unpacklo_epi8(v1_0, v1_2); in xnn_x24_transposec_ukernel__4x4_ssse3() local
|
/external/XNNPACK/src/x16-transposec/gen/ |
D | 8x8-reuse-dec-zip-neon.c | 46 const uint16x8_t v3_0 = vld1q_u16(i0); i0 = (uint16_t*) ((uintptr_t) i0 + input_stride); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon() local 103 const uint16x8_t v3_0 = vld1q_u16(i0); in xnn_x16_transposec_ukernel__8x8_reuse_dec_zip_neon() local
|
D | 8x8-multi-dec-zip-neon.c | 54 const uint16x8_t v3_0 = vld1q_u16(i0); i0 = (uint16_t*) ((uintptr_t) i0 + input_offset); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon() local 111 const uint16x8_t v3_0 = vld1q_u16(i0); in xnn_x16_transposec_ukernel__8x8_multi_dec_zip_neon() local
|
D | 8x8-reuse-switch-zip-neon.c | 45 const uint16x8_t v3_0 = vld1q_u16(i0); i0 = (uint16_t*) ((uintptr_t) i0 + input_stride); in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon() local 93 const uint16x8_t v3_0 = vld1q_u16(i0); in xnn_x16_transposec_ukernel__8x8_reuse_switch_zip_neon() local
|
D | 8x8-reuse-mov-zip-neon.c | 46 const uint16x8_t v3_0 = vld1q_u16(i0); i0 = (uint16_t*) ((uintptr_t) i0 + input_stride); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon() local 110 const uint16x8_t v3_0 = vld1q_u16(i0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_zip_neon() local
|
D | 8x8-reuse-multi-zip-neon.c | 70 const uint16x8_t v3_0 = vld1q_u16(i0); i0 = (uint16_t*) ((uintptr_t) i0 + input_stride); in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon() local 104 const uint16x8_t v3_0 = vld1q_u16(i0); in xnn_x16_transposec_ukernel__8x8_reuse_multi_zip_neon() local
|
D | 8x8-multi-switch-zip-neon.c | 53 const uint16x8_t v3_0 = vld1q_u16(i0); i0 = (uint16_t*) ((uintptr_t) i0 + input_offset); in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon() local 101 const uint16x8_t v3_0 = vld1q_u16(i0); in xnn_x16_transposec_ukernel__8x8_multi_switch_zip_neon() local
|
D | 8x8-multi-mov-zip-neon.c | 54 const uint16x8_t v3_0 = vld1q_u16(i0); i0 = (uint16_t*) ((uintptr_t) i0 + input_offset); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon() local 118 const uint16x8_t v3_0 = vld1q_u16(i0); in xnn_x16_transposec_ukernel__8x8_multi_mov_zip_neon() local
|
D | 8x8-reuse-mov-wasmsimd.c | 46 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd() local 129 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_wasmsimd() local
|
D | 8x8-multi-switch-wasmsimd.c | 53 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd() local 127 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_multi_switch_wasmsimd() local
|
D | 8x8-reuse-multi-sse2.c | 72 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2() local 135 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_reuse_multi_sse2() local
|
D | 8x8-reuse-switch-sse2.c | 47 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2() local 123 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_reuse_switch_sse2() local
|
D | 8x8-reuse-multi-wasmsimd.c | 70 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd() local 131 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_reuse_multi_wasmsimd() local
|
D | 8x8-reuse-switch-wasmsimd.c | 45 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_reuse_switch_wasmsimd() local 119 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_reuse_switch_wasmsimd() local
|
D | 8x8-multi-switch-sse2.c | 55 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_multi_switch_sse2() local 131 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_multi_switch_sse2() local
|
D | 8x8-reuse-mov-sse2.c | 48 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2() local 133 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_reuse_mov_sse2() local
|
D | 8x8-multi-mov-sse2.c | 56 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_multi_mov_sse2() local 141 const __m128i v3_0 = _mm_loadu_si128((const __m128i*) i0); in xnn_x16_transposec_ukernel__8x8_multi_mov_sse2() local
|
D | 8x8-multi-mov-wasmsimd.c | 54 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd() local 137 const v128_t v3_0 = wasm_v128_load(i0); in xnn_x16_transposec_ukernel__8x8_multi_mov_wasmsimd() local
|
/external/XNNPACK/src/x8-transposec/gen/ |
D | 8x8-reuse-dec-zip-neon.c | 46 const uint8x8_t v3_0 = vld1_u8(i0); i0 = (uint8_t*) ((uintptr_t) i0 + input_stride); in xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon() local 103 const uint8x8_t v3_0 = vld1_u8(i0); in xnn_x8_transposec_ukernel__8x8_reuse_dec_zip_neon() local
|
D | 8x8-multi-switch-zip-neon.c | 53 const uint8x8_t v3_0 = vld1_u8(i0); i0 = (uint8_t*) ((uintptr_t) i0 + input_offset); in xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon() local 101 const uint8x8_t v3_0 = vld1_u8(i0); in xnn_x8_transposec_ukernel__8x8_multi_switch_zip_neon() local
|
D | 8x8-multi-dec-zip-neon.c | 54 const uint8x8_t v3_0 = vld1_u8(i0); i0 = (uint8_t*) ((uintptr_t) i0 + input_offset); in xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon() local 111 const uint8x8_t v3_0 = vld1_u8(i0); in xnn_x8_transposec_ukernel__8x8_multi_dec_zip_neon() local
|
D | 8x8-reuse-switch-zip-neon.c | 45 const uint8x8_t v3_0 = vld1_u8(i0); i0 = (uint8_t*) ((uintptr_t) i0 + input_stride); in xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon() local 93 const uint8x8_t v3_0 = vld1_u8(i0); in xnn_x8_transposec_ukernel__8x8_reuse_switch_zip_neon() local
|
D | 8x8-reuse-mov-zip-neon.c | 46 const uint8x8_t v3_0 = vld1_u8(i0); i0 = (uint8_t*) ((uintptr_t) i0 + input_stride); in xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon() local 110 const uint8x8_t v3_0 = vld1_u8(i0); in xnn_x8_transposec_ukernel__8x8_reuse_mov_zip_neon() local
|
D | 8x8-reuse-multi-zip-neon.c | 70 const uint8x8_t v3_0 = vld1_u8(i0); i0 = (uint8_t*) ((uintptr_t) i0 + input_stride); in xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon() local 104 const uint8x8_t v3_0 = vld1_u8(i0); in xnn_x8_transposec_ukernel__8x8_reuse_multi_zip_neon() local
|
D | 8x8-multi-mov-zip-neon.c | 54 const uint8x8_t v3_0 = vld1_u8(i0); i0 = (uint8_t*) ((uintptr_t) i0 + input_offset); in xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon() local 118 const uint8x8_t v3_0 = vld1_u8(i0); in xnn_x8_transposec_ukernel__8x8_multi_mov_zip_neon() local
|