Home
last modified time | relevance | path

Searched refs:address_increment (Results 1 – 22 of 22) sorted by relevance

/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/u8maxpool/
D16x9p8q-neon.c105 const size_t address_increment = k - 16; in pytorch_u8maxpool_ukernel_16x9p8q__neon() local
106 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
107 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
108 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
109 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
110 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
111 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
112 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
113 i7 = (const uint8_t*)((uintptr_t)i7 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
114 i8 = (const uint8_t*)((uintptr_t)i8 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__neon()
[all …]
D16x9p8q-sse2.c108 const size_t address_increment = k - 16; in pytorch_u8maxpool_ukernel_16x9p8q__sse2() local
109 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
110 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
111 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
112 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
113 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
114 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
115 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
116 i7 = (const uint8_t*)((uintptr_t)i7 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
117 i8 = (const uint8_t*)((uintptr_t)i8 + address_increment); in pytorch_u8maxpool_ukernel_16x9p8q__sse2()
[all …]
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8avgpool/
Dmp8x9p8q-neon.c103 const size_t address_increment = k - 8; in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
104 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
105 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
106 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
107 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
108 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
109 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
110 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
111 i7 = (const uint8_t*)((uintptr_t)i7 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
112 i8 = (const uint8_t*)((uintptr_t)i8 + address_increment); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
[all …]
Dup8x9-neon.c151 const size_t address_increment = k - 8; in pytorch_q8avgpool_ukernel_up8x9__neon() local
152 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
153 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
154 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
155 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
156 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
157 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
158 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
159 i7 = (const uint8_t*)((uintptr_t)i7 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
160 i8 = (const uint8_t*)((uintptr_t)i8 + address_increment); in pytorch_q8avgpool_ukernel_up8x9__neon()
[all …]
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/x8zip/
Dx4-neon.c36 const size_t address_increment = n - 8; in pytorch_qnnp_x8zip_x4__neon() local
38 vxyzw.val[0] = vld1_u8(x + address_increment); in pytorch_qnnp_x8zip_x4__neon()
39 vxyzw.val[1] = vld1_u8(y + address_increment); in pytorch_qnnp_x8zip_x4__neon()
40 vxyzw.val[2] = vld1_u8(z + address_increment); in pytorch_qnnp_x8zip_x4__neon()
41 vxyzw.val[3] = vld1_u8(w + address_increment); in pytorch_qnnp_x8zip_x4__neon()
42 vst4_u8((uint8_t*)((uintptr_t)o + address_increment * 4), vxyzw); in pytorch_qnnp_x8zip_x4__neon()
Dx3-neon.c33 const size_t address_increment = n - 8; in pytorch_qnnp_x8zip_x3__neon() local
35 vxyz.val[0] = vld1_u8(x + address_increment); in pytorch_qnnp_x8zip_x3__neon()
36 vxyz.val[1] = vld1_u8(y + address_increment); in pytorch_qnnp_x8zip_x3__neon()
37 vxyz.val[2] = vld1_u8(z + address_increment); in pytorch_qnnp_x8zip_x3__neon()
38 vst3_u8((uint8_t*)((uintptr_t)o + address_increment * 3), vxyz); in pytorch_qnnp_x8zip_x3__neon()
Dx4-sse2.c46 const size_t address_increment = n - 16; in pytorch_qnnp_x8zip_x4__sse2() local
48 _mm_loadu_si128((const __m128i*)((uintptr_t)x + address_increment)); in pytorch_qnnp_x8zip_x4__sse2()
50 _mm_loadu_si128((const __m128i*)((uintptr_t)y + address_increment)); in pytorch_qnnp_x8zip_x4__sse2()
52 _mm_loadu_si128((const __m128i*)((uintptr_t)z + address_increment)); in pytorch_qnnp_x8zip_x4__sse2()
54 _mm_loadu_si128((const __m128i*)((uintptr_t)w + address_increment)); in pytorch_qnnp_x8zip_x4__sse2()
63 o = (void*)((uintptr_t)o + address_increment * 4); in pytorch_qnnp_x8zip_x4__sse2()
Dx2-neon.c31 const size_t address_increment = n - 8; in pytorch_qnnp_x8zip_x2__neon() local
33 vxy.val[0] = vld1_u8((const uint8_t*)((uintptr_t)x + address_increment)); in pytorch_qnnp_x8zip_x2__neon()
34 vxy.val[1] = vld1_u8((const uint8_t*)((uintptr_t)y + address_increment)); in pytorch_qnnp_x8zip_x2__neon()
35 vst2_u8((uint8_t*)((uintptr_t)o + address_increment * 2), vxy); in pytorch_qnnp_x8zip_x2__neon()
Dx2-sse2.c32 const size_t address_increment = n - 16; in pytorch_qnnp_x8zip_x2__sse2() local
34 _mm_loadu_si128((const __m128i*)((uintptr_t)x + address_increment)); in pytorch_qnnp_x8zip_x2__sse2()
36 _mm_loadu_si128((const __m128i*)((uintptr_t)y + address_increment)); in pytorch_qnnp_x8zip_x2__sse2()
39 o = (void*)((uintptr_t)o + address_increment * 2); in pytorch_qnnp_x8zip_x2__sse2()
Dxm-neon.c102 const size_t address_increment = k - 8; in pytorch_qnnp_x8zip_xm__neon() local
103 x = (const uint8_t*)((uintptr_t)x + address_increment); in pytorch_qnnp_x8zip_xm__neon()
104 y = (const uint8_t*)((uintptr_t)y + address_increment); in pytorch_qnnp_x8zip_xm__neon()
105 z = (const uint8_t*)((uintptr_t)z + address_increment); in pytorch_qnnp_x8zip_xm__neon()
106 w = (const uint8_t*)((uintptr_t)w + address_increment); in pytorch_qnnp_x8zip_xm__neon()
107 const int64x1_t vshift = vmov_n_s64(8 * address_increment); in pytorch_qnnp_x8zip_xm__neon()
Dx3-sse2.c109 const size_t address_increment = n - 16; in pytorch_qnnp_x8zip_x3__sse2() local
113 _mm_loadu_si128((const __m128i*)((uintptr_t)x + address_increment)); in pytorch_qnnp_x8zip_x3__sse2()
117 _mm_loadu_si128((const __m128i*)((uintptr_t)y + address_increment)); in pytorch_qnnp_x8zip_x3__sse2()
121 _mm_loadu_si128((const __m128i*)((uintptr_t)z + address_increment)); in pytorch_qnnp_x8zip_x3__sse2()
189 o = (uint8_t*)((uintptr_t)o + address_increment * 3); in pytorch_qnnp_x8zip_x3__sse2()
/external/XNNPACK/src/x8-zip/
Dx4-neon.c36 const size_t address_increment = n - 8; in xnn_x8_zip_x4_ukernel__neon() local
38 vxyzw.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x4_ukernel__neon()
39 vxyzw.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x4_ukernel__neon()
40 vxyzw.val[2] = vld1_u8((const uint8_t*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x4_ukernel__neon()
41 vxyzw.val[3] = vld1_u8((const uint8_t*) ((uintptr_t) w + address_increment)); in xnn_x8_zip_x4_ukernel__neon()
42 vst4_u8((uint8_t*) ((uintptr_t) o + address_increment * 4), vxyzw); in xnn_x8_zip_x4_ukernel__neon()
Dx3-neon.c34 const size_t address_increment = n - 8; in xnn_x8_zip_x3_ukernel__neon() local
36 vxyz.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x3_ukernel__neon()
37 vxyz.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x3_ukernel__neon()
38 vxyz.val[2] = vld1_u8((const uint8_t*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x3_ukernel__neon()
39 vst3_u8((uint8_t*) ((uintptr_t) o + address_increment * 3), vxyz); in xnn_x8_zip_x3_ukernel__neon()
Dx4-sse2.c51 const size_t address_increment = n - 16; in xnn_x8_zip_x4_ukernel__sse2() local
52 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x4_ukernel__sse2()
53 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x4_ukernel__sse2()
54 const __m128i vz = _mm_loadu_si128((const __m128i*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x4_ukernel__sse2()
55 const __m128i vw = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + address_increment)); in xnn_x8_zip_x4_ukernel__sse2()
64 o = (void*) ((uintptr_t) o + address_increment * 4); in xnn_x8_zip_x4_ukernel__sse2()
Dx2-neon.c32 const size_t address_increment = n - 8; in xnn_x8_zip_x2_ukernel__neon() local
34 vxy.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x2_ukernel__neon()
35 vxy.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x2_ukernel__neon()
36 vst2_u8((uint8_t*) ((uintptr_t) o + address_increment * 2), vxy); in xnn_x8_zip_x2_ukernel__neon()
Dx2-sse2.c37 const size_t address_increment = n - 16; in xnn_x8_zip_x2_ukernel__sse2() local
38 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x2_ukernel__sse2()
39 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x2_ukernel__sse2()
42 o = (void*) ((uintptr_t) o + address_increment * 2); in xnn_x8_zip_x2_ukernel__sse2()
Dxm-neon.c74 const size_t address_increment = k - 8; in xnn_x8_zip_xm_ukernel__neon() local
75 x = (const uint8_t*) ((uintptr_t) x + address_increment); in xnn_x8_zip_xm_ukernel__neon()
76 y = (const uint8_t*) ((uintptr_t) y + address_increment); in xnn_x8_zip_xm_ukernel__neon()
77 z = (const uint8_t*) ((uintptr_t) z + address_increment); in xnn_x8_zip_xm_ukernel__neon()
78 w = (const uint8_t*) ((uintptr_t) w + address_increment); in xnn_x8_zip_xm_ukernel__neon()
79 const int64x1_t vshift = vmov_n_s64(8 * address_increment); in xnn_x8_zip_xm_ukernel__neon()
Dx3-sse2.c79 const size_t address_increment = n - 16; in xnn_x8_zip_x3_ukernel__sse2() local
81 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x3_ukernel__sse2()
83 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x3_ukernel__sse2()
85 const __m128i vz = _mm_loadu_si128((const __m128i*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x3_ukernel__sse2()
121 o = (uint8_t*) ((uintptr_t) o + address_increment * 3); in xnn_x8_zip_x3_ukernel__sse2()
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gavgpool/
Dup8x7-neon.c130 const size_t address_increment = n - 8; in pytorch_q8gavgpool_ukernel_up8x7__neon() local
131 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
132 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
133 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
134 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
135 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
136 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
137 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
138 const int64x1_t vshift = vmov_n_s64(8 * address_increment); in pytorch_q8gavgpool_ukernel_up8x7__neon()
Dmp8x7p7q-neon.c228 const size_t address_increment = n - 8; in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon() local
229 i0 = (const uint8_t*)((uintptr_t)i0 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
230 i1 = (const uint8_t*)((uintptr_t)i1 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
231 i2 = (const uint8_t*)((uintptr_t)i2 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
232 i3 = (const uint8_t*)((uintptr_t)i3 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
233 i4 = (const uint8_t*)((uintptr_t)i4 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
234 i5 = (const uint8_t*)((uintptr_t)i5 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
235 i6 = (const uint8_t*)((uintptr_t)i6 + address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
236 const int64x1_t vshift = vmov_n_s64(8 * address_increment); in pytorch_q8gavgpool_ukernel_mp8x7p7q__neon()
/external/pytorch/aten/src/ATen/native/cuda/
DNormalization.cuh975 int address_increment = inner_loop_stride * stride; in batch_norm_collect_statistics_channels_last_kernel() local
996 address_base += address_increment; in batch_norm_collect_statistics_channels_last_kernel()
1118 int address_increment = inner_loop_stride * stride; in batch_norm_transform_input_channels_last_kernel() local
1131 address_base += address_increment; in batch_norm_transform_input_channels_last_kernel()
1205 int address_increment = inner_loop_stride * stride; in batch_norm_backward_reduce_channels_last_kernel() local
1225 address_base += address_increment; in batch_norm_backward_reduce_channels_last_kernel()
1356 int address_increment = inner_loop_stride * stride; in batch_norm_backward_elemt_channels_last_kernel_impl() local
1368 address_base += address_increment; in batch_norm_backward_elemt_channels_last_kernel_impl()
/external/XNNPACK/src/amalgam/
Dsse2.c13960 const size_t address_increment = n - 16; in xnn_x8_zip_x2_ukernel__sse2() local
13961 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x2_ukernel__sse2()
13962 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x2_ukernel__sse2()
13965 o = (void*) ((uintptr_t) o + address_increment * 2); in xnn_x8_zip_x2_ukernel__sse2()
14045 const size_t address_increment = n - 16; in xnn_x8_zip_x3_ukernel__sse2() local
14047 const __m128i vx = _mm_loadu_si128((const __m128i*) ((uintptr_t) x + address_increment)); in xnn_x8_zip_x3_ukernel__sse2()
14049 const __m128i vy = _mm_loadu_si128((const __m128i*) ((uintptr_t) y + address_increment)); in xnn_x8_zip_x3_ukernel__sse2()
14051 const __m128i vz = _mm_loadu_si128((const __m128i*) ((uintptr_t) z + address_increment)); in xnn_x8_zip_x3_ukernel__sse2()
14087 o = (uint8_t*) ((uintptr_t) o + address_increment * 3); in xnn_x8_zip_x3_ukernel__sse2()
14142 const size_t address_increment = n - 16; in xnn_x8_zip_x4_ukernel__sse2() local
[all …]