/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/u8maxpool/ |
D | 16x9p8q-neon.c | 105 const size_t address_increment = k - 16; in pytorch_u8maxpool_ukernel_16x9p8q__neon() local 106 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() 107 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() 108 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() 109 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() 110 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() 111 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() 112 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() 113 i7 = (const uint8_t*)((uintptr_t)i7 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() 114 i8 = (const uint8_t*)((uintptr_t)i8 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon() [all …]
|
D | 16x9p8q-sse2.c | 108 const size_t address_increment = k - 16; in pytorch_u8maxpool_ukernel_16x9p8q__sse2() local 109 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() 110 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() 111 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() 112 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() 113 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() 114 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() 115 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() 116 i7 = (const uint8_t*)((uintptr_t)i7 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() 117 i8 = (const uint8_t*)((uintptr_t)i8 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2() [all …]
|
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8avgpool/ |
D | mp8x9p8q-neon.c | 103 const size_t address_increment = k - 8; in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 104 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 105 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 106 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 107 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 108 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 109 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 110 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 111 i7 = (const uint8_t*)((uintptr_t)i7 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 112 i8 = (const uint8_t*)((uintptr_t)i8 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() [all …]
|
D | up8x9-neon.c | 151 const size_t address_increment = k - 8; in pytorch_q8avgpool_ukernel_up8x9__neon() local 152 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() 153 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() 154 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() 155 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() 156 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() 157 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() 158 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() 159 i7 = (const uint8_t*)((uintptr_t)i7 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() 160 i8 = (const uint8_t*)((uintptr_t)i8 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon() [all …]
|
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/x8zip/ |
D | x4-neon.c | 36 const size_t address_increment = n - 8; in pytorch_qnnp_x8zip_x4__neon() local 38 vxyzw.val[0] = vld1_u8(x + address_increment); in pytorch_qnnp_x8zip_x4__neon() 39 vxyzw.val[1] = vld1_u8(y + address_increment); in pytorch_qnnp_x8zip_x4__neon() 40 vxyzw.val[2] = vld1_u8(z + address_increment); in pytorch_qnnp_x8zip_x4__neon() 41 vxyzw.val[3] = vld1_u8(w + address_increment); in pytorch_qnnp_x8zip_x4__neon() 42 vst4_u8((uint8_t*)((uintptr_t)o + address_increment * 4), vxyzw); in pytorch_qnnp_x8zip_x4__neon()
|
D | x3-neon.c | 33 const size_t address_increment = n - 8; in pytorch_qnnp_x8zip_x3__neon() local 35 vxyz.val[0] = vld1_u8(x + address_increment); in pytorch_qnnp_x8zip_x3__neon() 36 vxyz.val[1] = vld1_u8(y + address_increment); in pytorch_qnnp_x8zip_x3__neon() 37 vxyz.val[2] = vld1_u8(z + address_increment); in pytorch_qnnp_x8zip_x3__neon() 38 vst3_u8((uint8_t*)((uintptr_t)o + address_increment * 3), vxyz); in pytorch_qnnp_x8zip_x3__neon()
|
D | x4-sse2.c | 46 const size_t address_increment = n - 16; in pytorch_qnnp_x8zip_x4__sse2() local 48 _mm_loadu_si128((const __m128i*)((uintptr_t)x + address_increment)); in pytorch_qnnp_x8zip_x4__sse2() 50 _mm_loadu_si128((const __m128i*)((uintptr_t)y + address_increment)); in pytorch_qnnp_x8zip_x4__sse2() 52 _mm_loadu_si128((const __m128i*)((uintptr_t)z + address_increment)); in pytorch_qnnp_x8zip_x4__sse2() 54 _mm_loadu_si128((const __m128i*)((uintptr_t)w + address_increment)); in pytorch_qnnp_x8zip_x4__sse2() 63 o = (void*)((uintptr_t)o + address_increment * 4); in pytorch_qnnp_x8zip_x4__sse2()
|
D | x2-neon.c | 31 const size_t address_increment = n - 8; in pytorch_qnnp_x8zip_x2__neon() local 33 vxy.val[0] = vld1_u8((const uint8_t*)((uintptr_t)x + address_increment)); in pytorch_qnnp_x8zip_x2__neon() 34 vxy.val[1] = vld1_u8((const uint8_t*)((uintptr_t)y + address_increment)); in pytorch_qnnp_x8zip_x2__neon() 35 vst2_u8((uint8_t*)((uintptr_t)o + address_increment * 2), vxy); in pytorch_qnnp_x8zip_x2__neon()
|
D | x2-sse2.c | 32 const size_t address_increment = n - 16; in pytorch_qnnp_x8zip_x2__sse2() local 34 _mm_loadu_si128((const __m128i*)((uintptr_t)x + address_increment)); in pytorch_qnnp_x8zip_x2__sse2() 36 _mm_loadu_si128((const __m128i*)((uintptr_t)y + address_increment)); in pytorch_qnnp_x8zip_x2__sse2() 39 o = (void*)((uintptr_t)o + address_increment * 2); in pytorch_qnnp_x8zip_x2__sse2()
|
D | xm-neon.c | 102 const size_t address_increment = k - 8; in pytorch_qnnp_x8zip_xm__neon() local 103 x = (const uint8_t*)((uintptr_t)x + address_increment); in pytorch_qnnp_x8zip_xm__neon() 104 y = (const uint8_t*)((uintptr_t)y + address_increment); in pytorch_qnnp_x8zip_xm__neon() 105 z = (const uint8_t*)((uintptr_t)z + address_increment); in pytorch_qnnp_x8zip_xm__neon() 106 w = (const uint8_t*)((uintptr_t)w + address_increment); in pytorch_qnnp_x8zip_xm__neon() 107 const int64x1_t vshift = vmov_n_s64(8 * address_increment); in pytorch_qnnp_x8zip_xm__neon()
|
D | x3-sse2.c | 109 const size_t address_increment = n - 16; in pytorch_qnnp_x8zip_x3__sse2() local 113 _mm_loadu_si128((const __m128i*)((uintptr_t)x + address_increment)); in pytorch_qnnp_x8zip_x3__sse2() 117 _mm_loadu_si128((const __m128i*)((uintptr_t)y + address_increment)); in pytorch_qnnp_x8zip_x3__sse2() 121 _mm_loadu_si128((const __m128i*)((uintptr_t)z + address_increment)); in pytorch_qnnp_x8zip_x3__sse2() 189 o = (uint8_t*)((uintptr_t)o + address_increment * 3); in pytorch_qnnp_x8zip_x3__sse2()
|
/external/XNNPACK/src/x8-zip/ |
D | x4-neon.c | 36 const size_t address_increment = n - 8; in xnn_x8_zip_x4_ukernel__neon() local 38 vxyzw.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x4_ukernel__neon() 39 vxyzw.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x4_ukernel__neon() 40 vxyzw.val[2] = vld1_u8((const uint8_t*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x4_ukernel__neon() 41 vxyzw.val[3] = vld1_u8((const uint8_t*) ((uintptr_t) w + address_increment)); in xnn_x8_zip_x4_ukernel__neon() 42 vst4_u8((uint8_t*) ((uintptr_t) o + address_increment * 4), vxyzw); in xnn_x8_zip_x4_ukernel__neon()
|
D | x3-neon.c | 34 const size_t address_increment = n - 8; in xnn_x8_zip_x3_ukernel__neon() local 36 vxyz.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x3_ukernel__neon() 37 vxyz.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x3_ukernel__neon() 38 vxyz.val[2] = vld1_u8((const uint8_t*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x3_ukernel__neon() 39 vst3_u8((uint8_t*) ((uintptr_t) o + address_increment * 3), vxyz); in xnn_x8_zip_x3_ukernel__neon()
|
D | x4-sse2.c | 51 const size_t address_increment = n - 16; in xnn_x8_zip_x4_ukernel__sse2() local 52 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x4_ukernel__sse2() 53 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x4_ukernel__sse2() 54 const __m128i vz = _mm_loadu_si128((const __m128i*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x4_ukernel__sse2() 55 const __m128i vw = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + address_increment)); in xnn_x8_zip_x4_ukernel__sse2() 64 o = (void*) ((uintptr_t) o + address_increment * 4); in xnn_x8_zip_x4_ukernel__sse2()
|
D | x2-neon.c | 32 const size_t address_increment = n - 8; in xnn_x8_zip_x2_ukernel__neon() local 34 vxy.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x2_ukernel__neon() 35 vxy.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x2_ukernel__neon() 36 vst2_u8((uint8_t*) ((uintptr_t) o + address_increment * 2), vxy); in xnn_x8_zip_x2_ukernel__neon()
|
D | x2-sse2.c | 37 const size_t address_increment = n - 16; in xnn_x8_zip_x2_ukernel__sse2() local 38 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x2_ukernel__sse2() 39 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x2_ukernel__sse2() 42 o = (void*) ((uintptr_t) o + address_increment * 2); in xnn_x8_zip_x2_ukernel__sse2()
|
D | xm-neon.c | 74 const size_t address_increment = k - 8; in xnn_x8_zip_xm_ukernel__neon() local 75 x = (const uint8_t*) ((uintptr_t) x + address_increment); in xnn_x8_zip_xm_ukernel__neon() 76 y = (const uint8_t*) ((uintptr_t) y + address_increment); in xnn_x8_zip_xm_ukernel__neon() 77 z = (const uint8_t*) ((uintptr_t) z + address_increment); in xnn_x8_zip_xm_ukernel__neon() 78 w = (const uint8_t*) ((uintptr_t) w + address_increment); in xnn_x8_zip_xm_ukernel__neon() 79 const int64x1_t vshift = vmov_n_s64(8 * address_increment); in xnn_x8_zip_xm_ukernel__neon()
|
D | x3-sse2.c | 79 const size_t address_increment = n - 16; in xnn_x8_zip_x3_ukernel__sse2() local 81 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x3_ukernel__sse2() 83 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x3_ukernel__sse2() 85 const __m128i vz = _mm_loadu_si128((const __m128i*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x3_ukernel__sse2() 121 o = (uint8_t*) ((uintptr_t) o + address_increment * 3); in xnn_x8_zip_x3_ukernel__sse2()
|
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gavgpool/ |
D | up8x7-neon.c | 130 const size_t address_increment = n - 8; in pytorch_q8gavgpool_ukernel_up8x7__neon() local 131 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon() 132 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon() 133 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon() 134 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon() 135 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon() 136 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon() 137 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon() 138 const int64x1_t vshift = vmov_n_s64(8 * address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
|
D | mp8x7p7q-neon.c | 228 const size_t address_increment = n - 8; in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() local 229 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() 230 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() 231 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() 232 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() 233 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() 234 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() 235 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() 236 const int64x1_t vshift = vmov_n_s64(8 * address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
|
/external/pytorch/aten/src/ATen/native/cuda/ |
D | Normalization.cuh | 975 int address_increment = inner_loop_stride * stride; in batch_norm_collect_statistics_channels_last_kernel() local 996 address_base += address_increment; in batch_norm_collect_statistics_channels_last_kernel() 1118 int address_increment = inner_loop_stride * stride; in batch_norm_transform_input_channels_last_kernel() local 1131 address_base += address_increment; in batch_norm_transform_input_channels_last_kernel() 1205 int address_increment = inner_loop_stride * stride; in batch_norm_backward_reduce_channels_last_kernel() local 1225 address_base += address_increment; in batch_norm_backward_reduce_channels_last_kernel() 1356 int address_increment = inner_loop_stride * stride; in batch_norm_backward_elemt_channels_last_kernel_impl() local 1368 address_base += address_increment; in batch_norm_backward_elemt_channels_last_kernel_impl()
|
/external/XNNPACK/src/amalgam/ |
D | sse2.c | 13960 const size_t address_increment = n - 16; in xnn_x8_zip_x2_ukernel__sse2() local 13961 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x2_ukernel__sse2() 13962 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x2_ukernel__sse2() 13965 o = (void*) ((uintptr_t) o + address_increment * 2); in xnn_x8_zip_x2_ukernel__sse2() 14045 const size_t address_increment = n - 16; in xnn_x8_zip_x3_ukernel__sse2() local 14047 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x3_ukernel__sse2() 14049 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x3_ukernel__sse2() 14051 const __m128i vz = _mm_loadu_si128((const __m128i*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x3_ukernel__sse2() 14087 o = (uint8_t*) ((uintptr_t) o + address_increment * 3); in xnn_x8_zip_x3_ukernel__sse2() 14142 const size_t address_increment = n - 16; in xnn_x8_zip_x4_ukernel__sse2() local [all …]
|