1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Zhenyu Wang <zhenyuw@linux.intel.com>
25 * Dominik Zeromski <dominik.zeromski@intel.com>
26 */
27
28 #include <intel_bufmgr.h>
29 #include <i915_drm.h>
30
31 #include "intel_reg.h"
32 #include "drmtest.h"
33
34 #include "gpgpu_fill.h"
35 #include "gpu_cmds.h"
36
37 /* lib/i915/shaders/gpgpu/gpgpu_fill.gxa */
38 static const uint32_t gen7_gpgpu_kernel[][4] = {
39 { 0x00400001, 0x20200231, 0x00000020, 0x00000000 },
40 { 0x00000041, 0x20400c21, 0x00000004, 0x00000010 },
41 { 0x00000001, 0x20440021, 0x00000018, 0x00000000 },
42 { 0x00600001, 0x20800021, 0x008d0000, 0x00000000 },
43 { 0x00200001, 0x20800021, 0x00450040, 0x00000000 },
44 { 0x00000001, 0x20880061, 0x00000000, 0x0000000f },
45 { 0x00800001, 0x20a00021, 0x00000020, 0x00000000 },
46 { 0x05800031, 0x24001ca8, 0x00000080, 0x060a8000 },
47 { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
48 { 0x07800031, 0x20001ca8, 0x00000e00, 0x82000010 },
49 };
50
51 static const uint32_t gen8_gpgpu_kernel[][4] = {
52 { 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
53 { 0x00000041, 0x20400208, 0x06000004, 0x00000010 },
54 { 0x00000001, 0x20440208, 0x00000018, 0x00000000 },
55 { 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
56 { 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
57 { 0x00000001, 0x20880608, 0x00000000, 0x0000000f },
58 { 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
59 { 0x0c800031, 0x24000a40, 0x0e000080, 0x060a8000 },
60 { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
61 { 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
62 };
63
64 static const uint32_t gen9_gpgpu_kernel[][4] = {
65 { 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
66 { 0x00000041, 0x20400208, 0x06000004, 0x00000010 },
67 { 0x00000001, 0x20440208, 0x00000018, 0x00000000 },
68 { 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
69 { 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
70 { 0x00000001, 0x20880608, 0x00000000, 0x0000000f },
71 { 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
72 { 0x0c800031, 0x24000a40, 0x06000080, 0x060a8000 },
73 { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
74 { 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
75 };
76
77 static const uint32_t gen11_gpgpu_kernel[][4] = {
78 { 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
79 { 0x00000009, 0x20400208, 0x06000004, 0x00000004 },
80 { 0x00000001, 0x20440208, 0x00000018, 0x00000000 },
81 { 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
82 { 0x00200001, 0x20800208, 0x00450040, 0x00000000 },
83 { 0x00000001, 0x20880608, 0x00000000, 0x0000000f },
84 { 0x00800001, 0x20a00208, 0x00000020, 0x00000000 },
85 { 0x0c800031, 0x24000a40, 0x06000080, 0x040a8000 },
86 { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
87 { 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
88 };
89
90 /*
91 * This sets up the gpgpu pipeline,
92 *
93 * +---------------+ <---- 4096
94 * | ^ |
95 * | | |
96 * | various |
97 * | state |
98 * | | |
99 * |_______|_______| <---- 2048 + ?
100 * | ^ |
101 * | | |
102 * | batch |
103 * | commands |
104 * | | |
105 * | | |
106 * +---------------+ <---- 0 + ?
107 *
108 */
109
110 #define BATCH_STATE_SPLIT 2048
111 /* VFE STATE params */
112 #define THREADS 1
113 #define GEN7_GPGPU_URB_ENTRIES 0
114 #define GEN8_GPGPU_URB_ENTRIES 1
115 #define GPGPU_URB_SIZE 0
116 #define GPGPU_CURBE_SIZE 1
117 #define GEN7_VFE_STATE_GPGPU_MODE 1
118
119 void
gen7_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color)120 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
121 const struct igt_buf *dst,
122 unsigned int x, unsigned int y,
123 unsigned int width, unsigned int height,
124 uint8_t color)
125 {
126 uint32_t curbe_buffer, interface_descriptor;
127 uint32_t batch_end;
128
129 intel_batchbuffer_flush(batch);
130
131 /* setup states */
132 batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
133
134 /*
135 * const buffer needs to fill for every thread, but as we have just 1
136 * thread per every group, so need only one curbe data.
137 * For each thread, just use thread group ID for buffer offset.
138 */
139 curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
140
141 interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
142 gen7_gpgpu_kernel, sizeof(gen7_gpgpu_kernel));
143
144 igt_assert(batch->ptr < &batch->buffer[4095]);
145
146 batch->ptr = batch->buffer;
147
148 /* GPGPU pipeline */
149 OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
150
151 gen7_emit_state_base_address(batch);
152 gen7_emit_vfe_state(batch, THREADS, GEN7_GPGPU_URB_ENTRIES,
153 GPGPU_URB_SIZE, GPGPU_CURBE_SIZE,
154 GEN7_VFE_STATE_GPGPU_MODE);
155 gen7_emit_curbe_load(batch, curbe_buffer);
156 gen7_emit_interface_descriptor_load(batch, interface_descriptor);
157 gen7_emit_gpgpu_walk(batch, x, y, width, height);
158
159 OUT_BATCH(MI_BATCH_BUFFER_END);
160
161 batch_end = intel_batchbuffer_align(batch, 8);
162 igt_assert(batch_end < BATCH_STATE_SPLIT);
163
164 gen7_render_flush(batch, batch_end);
165 intel_batchbuffer_reset(batch);
166 }
167
168 void
gen8_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color)169 gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
170 const struct igt_buf *dst,
171 unsigned int x, unsigned int y,
172 unsigned int width, unsigned int height,
173 uint8_t color)
174 {
175 uint32_t curbe_buffer, interface_descriptor;
176 uint32_t batch_end;
177
178 intel_batchbuffer_flush(batch);
179
180 /* setup states */
181 batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
182
183 /*
184 * const buffer needs to fill for every thread, but as we have just 1
185 * thread per every group, so need only one curbe data.
186 * For each thread, just use thread group ID for buffer offset.
187 */
188 curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
189
190 interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
191 gen8_gpgpu_kernel, sizeof(gen8_gpgpu_kernel));
192
193 igt_assert(batch->ptr < &batch->buffer[4095]);
194
195 batch->ptr = batch->buffer;
196
197 /* GPGPU pipeline */
198 OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
199
200 gen8_emit_state_base_address(batch);
201 gen8_emit_vfe_state(batch, THREADS, GEN8_GPGPU_URB_ENTRIES,
202 GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
203 gen7_emit_curbe_load(batch, curbe_buffer);
204 gen7_emit_interface_descriptor_load(batch, interface_descriptor);
205 gen8_emit_gpgpu_walk(batch, x, y, width, height);
206
207 OUT_BATCH(MI_BATCH_BUFFER_END);
208
209 batch_end = intel_batchbuffer_align(batch, 8);
210 igt_assert(batch_end < BATCH_STATE_SPLIT);
211
212 gen7_render_flush(batch, batch_end);
213 intel_batchbuffer_reset(batch);
214 }
215
216 static void
__gen9_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color,const uint32_t kernel[][4],size_t kernel_size)217 __gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
218 const struct igt_buf *dst,
219 unsigned int x, unsigned int y,
220 unsigned int width, unsigned int height,
221 uint8_t color, const uint32_t kernel[][4],
222 size_t kernel_size)
223 {
224 uint32_t curbe_buffer, interface_descriptor;
225 uint32_t batch_end;
226
227 intel_batchbuffer_flush(batch);
228
229 /* setup states */
230 batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
231
232 /*
233 * const buffer needs to fill for every thread, but as we have just 1
234 * thread per every group, so need only one curbe data.
235 * For each thread, just use thread group ID for buffer offset.
236 */
237 curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
238
239 interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
240 kernel, kernel_size);
241
242 igt_assert(batch->ptr < &batch->buffer[4095]);
243
244 batch->ptr = batch->buffer;
245
246 /* GPGPU pipeline */
247 OUT_BATCH(GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
248 PIPELINE_SELECT_GPGPU);
249
250 gen9_emit_state_base_address(batch);
251 gen8_emit_vfe_state(batch, THREADS, GEN8_GPGPU_URB_ENTRIES,
252 GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
253 gen7_emit_curbe_load(batch, curbe_buffer);
254 gen7_emit_interface_descriptor_load(batch, interface_descriptor);
255 gen8_emit_gpgpu_walk(batch, x, y, width, height);
256
257 OUT_BATCH(MI_BATCH_BUFFER_END);
258
259 batch_end = intel_batchbuffer_align(batch, 8);
260 igt_assert(batch_end < BATCH_STATE_SPLIT);
261
262 gen7_render_flush(batch, batch_end);
263 intel_batchbuffer_reset(batch);
264 }
265
gen9_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color)266 void gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
267 const struct igt_buf *dst,
268 unsigned int x, unsigned int y,
269 unsigned int width, unsigned int height,
270 uint8_t color)
271 {
272 __gen9_gpgpu_fillfunc(batch, dst, x, y, width, height, color,
273 gen9_gpgpu_kernel, sizeof(gen9_gpgpu_kernel));
274 }
275
gen11_gpgpu_fillfunc(struct intel_batchbuffer * batch,const struct igt_buf * dst,unsigned int x,unsigned int y,unsigned int width,unsigned int height,uint8_t color)276 void gen11_gpgpu_fillfunc(struct intel_batchbuffer *batch,
277 const struct igt_buf *dst,
278 unsigned int x, unsigned int y,
279 unsigned int width, unsigned int height,
280 uint8_t color)
281 {
282 __gen9_gpgpu_fillfunc(batch, dst, x, y, width, height, color,
283 gen11_gpgpu_kernel, sizeof(gen11_gpgpu_kernel));
284 }
285