• Home
  • Raw
  • Download

Lines Matching full:cpu

31 v3d_load_utile(void *cpu, uint32_t cpu_stride,  in v3d_load_utile()  argument
41 /* Store each 8-byte line to cpu-side destination, in v3d_load_utile()
44 "vst1.8 d0, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
45 "vst1.8 d1, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
46 "vst1.8 d2, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
47 "vst1.8 d3, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
48 "vst1.8 d4, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
49 "vst1.8 d5, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
50 "vst1.8 d6, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
51 "vst1.8 d7, [%[cpu]]\n" in v3d_load_utile()
52 : [cpu] "+r"(cpu) in v3d_load_utile()
58 void *cpu2 = cpu + 8; in v3d_load_utile()
64 /* Store each 16-byte line in 2 parts to the cpu-side in v3d_load_utile()
68 "vst1.8 d0, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
70 "vst1.8 d2, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
72 "vst1.8 d4, [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
74 "vst1.8 d6, [%[cpu]]\n" in v3d_load_utile()
76 : [cpu] "+r"(cpu), in v3d_load_utile()
90 /* Store each 8-byte line to cpu-side destination, in v3d_load_utile()
93 "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
94 "st1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
95 "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
96 "st1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
97 "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
98 "st1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
99 "st1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
100 "st1 {v3.D}[1], [%[cpu]]\n" in v3d_load_utile()
101 : [cpu] "+r"(cpu) in v3d_load_utile()
107 void *cpu2 = cpu + 8; in v3d_load_utile()
113 /* Store each 16-byte line in 2 parts to the cpu-side in v3d_load_utile()
117 "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
119 "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
121 "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_load_utile()
123 "st1 {v3.D}[0], [%[cpu]]\n" in v3d_load_utile()
125 : [cpu] "+r"(cpu), in v3d_load_utile()
135 memcpy(cpu, gpu + gpu_offset, gpu_stride); in v3d_load_utile()
136 cpu += cpu_stride; in v3d_load_utile()
142 void *cpu, uint32_t cpu_stride) in v3d_store_utile() argument
147 /* Load each 8-byte line from cpu-side source, in v3d_store_utile()
150 "vld1.8 d0, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
151 "vld1.8 d1, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
152 "vld1.8 d2, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
153 "vld1.8 d3, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
154 "vld1.8 d4, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
155 "vld1.8 d5, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
156 "vld1.8 d6, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
157 "vld1.8 d7, [%[cpu]]\n" in v3d_store_utile()
162 : [cpu] "+r"(cpu) in v3d_store_utile()
168 void *cpu2 = cpu + 8; in v3d_store_utile()
170 /* Load each 16-byte line in 2 parts from the cpu-side in v3d_store_utile()
174 "vld1.8 d0, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
176 "vld1.8 d2, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
178 "vld1.8 d4, [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
180 "vld1.8 d6, [%[cpu]]\n" in v3d_store_utile()
184 : [cpu] "+r"(cpu), in v3d_store_utile()
194 /* Load each 8-byte line from cpu-side source, in v3d_store_utile()
197 "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
198 "ld1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
199 "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
200 "ld1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
201 "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
202 "ld1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
203 "ld1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
204 "ld1 {v3.D}[1], [%[cpu]]\n" in v3d_store_utile()
207 : [cpu] "+r"(cpu) in v3d_store_utile()
213 void *cpu2 = cpu + 8; in v3d_store_utile()
215 /* Load each 16-byte line in 2 parts from the cpu-side in v3d_store_utile()
219 "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
221 "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
223 "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n" in v3d_store_utile()
225 "ld1 {v3.D}[0], [%[cpu]]\n" in v3d_store_utile()
229 : [cpu] "+r"(cpu), in v3d_store_utile()
239 memcpy(gpu + gpu_offset, cpu, gpu_stride); in v3d_store_utile()
240 cpu += cpu_stride; in v3d_store_utile()