1 /*
2 * Copyright © 2022 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_runner.h"
6
7 #include <fcntl.h>
8 #include <string.h>
9 #include <xf86drm.h>
10
11 #include "mme_fermi_sim.h"
12 #include "mme_tu104_sim.h"
13
14 #include "nvk_clc597.h"
15
16 #include "nouveau_bo.h"
17 #include "nouveau_context.h"
18
19 /* nouveau_drm.h isn't C++-friendly */
20 #define class cls
21 #include "drm-uapi/nouveau_drm.h"
22 #undef class
23
mme_runner()24 mme_runner::mme_runner() :
25 devinfo(NULL), data_addr(0), data(NULL)
26 { }
27
~mme_runner()28 mme_runner::~mme_runner()
29 { }
30
mme_hw_runner()31 mme_hw_runner::mme_hw_runner() :
32 mme_runner(), p(NULL), dev(NULL), ctx(NULL),
33 data_bo(NULL), push_bo(NULL),
34 syncobj(0),
35 push_map(NULL)
36 {
37 memset(&push, 0, sizeof(push));
38 }
39
40 void
mme_store_data(mme_builder * b,uint32_t dw_idx,mme_value data,bool free_reg)41 mme_runner::mme_store_data(mme_builder *b, uint32_t dw_idx,
42 mme_value data, bool free_reg)
43 {
44 mme_store_imm_addr(b, data_addr + dw_idx * 4, data, free_reg);
45 }
46
~mme_hw_runner()47 mme_hw_runner::~mme_hw_runner()
48 {
49 if (syncobj)
50 drmSyncobjDestroy(dev->fd, syncobj);
51 if (push_bo) {
52 nouveau_ws_bo_unmap(push_bo, push_map);
53 nouveau_ws_bo_destroy(push_bo);
54 }
55 if (ctx)
56 nouveau_ws_context_destroy(ctx);
57 if (dev)
58 nouveau_ws_device_destroy(dev);
59 }
60
61 #define PUSH_SIZE 64 * 4096
62
63 bool
set_up_hw(uint16_t min_cls,uint16_t max_cls)64 mme_hw_runner::set_up_hw(uint16_t min_cls, uint16_t max_cls)
65 {
66 drmDevicePtr devices[8];
67 int max_devices = drmGetDevices2(0, devices, 8);
68
69 int i;
70 for (i = 0; i < max_devices; i++) {
71 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
72 devices[i]->bustype == DRM_BUS_PCI &&
73 devices[i]->deviceinfo.pci->vendor_id == 0x10de) {
74 dev = nouveau_ws_device_new(devices[i]);
75 if (dev == NULL)
76 continue;
77
78 if (dev->info.cls_eng3d < min_cls || dev->info.cls_eng3d > max_cls) {
79 nouveau_ws_device_destroy(dev);
80 dev = NULL;
81 continue;
82 }
83
84 /* Found a Turning+ device */
85 break;
86 }
87 }
88
89 if (dev == NULL)
90 return false;
91
92 devinfo = &dev->info;
93
94 int ret = nouveau_ws_context_create(dev, NOUVEAU_WS_ENGINE_3D, &ctx);
95 if (ret)
96 return false;
97
98 uint32_t data_bo_flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP;
99 data_bo = nouveau_ws_bo_new_mapped(dev, DATA_BO_SIZE, 0,
100 (nouveau_ws_bo_flags)data_bo_flags,
101 NOUVEAU_WS_BO_RDWR, (void **)&data);
102 if (data_bo == NULL)
103 return false;
104
105 memset(data, 139, DATA_BO_SIZE);
106 data_addr = data_bo->offset;
107
108 uint32_t push_bo_flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP;
109 push_bo = nouveau_ws_bo_new_mapped(dev, PUSH_SIZE, 0,
110 (nouveau_ws_bo_flags)push_bo_flags,
111 NOUVEAU_WS_BO_WR, &push_map);
112 if (push_bo == NULL)
113 return false;
114
115 ret = drmSyncobjCreate(dev->fd, 0, &syncobj);
116 if (ret < 0)
117 return false;
118
119 reset_push();
120
121 return true;
122 }
123
124 void
reset_push()125 mme_hw_runner::reset_push()
126 {
127 nv_push_init(&push, (uint32_t *)push_map, PUSH_SIZE / 4);
128 p = &push;
129
130 P_MTHD(p, NV9097, SET_OBJECT);
131 P_NV9097_SET_OBJECT(p, {
132 .class_id = dev->info.cls_eng3d,
133 .engine_id = 0,
134 });
135 }
136
137 void
submit_push()138 mme_hw_runner::submit_push()
139 {
140 struct drm_nouveau_exec_push push = {
141 .va = push_bo->offset,
142 .va_len = (uint32_t)nv_push_dw_count(&this->push) * 4,
143 };
144
145 struct drm_nouveau_sync sync = {
146 .flags = DRM_NOUVEAU_SYNC_SYNCOBJ,
147 .handle = syncobj,
148 .timeline_value = 0,
149 };
150
151 struct drm_nouveau_exec req = {
152 .channel = (uint32_t)ctx->channel,
153 .push_count = 1,
154 .sig_count = 1,
155 .sig_ptr = (uintptr_t)&sync,
156 .push_ptr = (uintptr_t)&push,
157 };
158
159 int ret = drmCommandWriteRead(dev->fd, DRM_NOUVEAU_EXEC,
160 &req, sizeof(req));
161 ASSERT_EQ(ret, 0);
162
163 ret = drmSyncobjWait(dev->fd, &syncobj, 1, INT64_MAX,
164 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
165 ASSERT_EQ(ret, 0);
166 }
167
168 void
push_macro(uint32_t id,const std::vector<uint32_t> & macro)169 mme_hw_runner::push_macro(uint32_t id, const std::vector<uint32_t> ¯o)
170 {
171 P_MTHD(p, NV9097, LOAD_MME_START_ADDRESS_RAM_POINTER);
172 P_NV9097_LOAD_MME_START_ADDRESS_RAM_POINTER(p, id);
173 P_NV9097_LOAD_MME_START_ADDRESS_RAM(p, 0);
174 P_1INC(p, NV9097, LOAD_MME_INSTRUCTION_RAM_POINTER);
175 P_NV9097_LOAD_MME_INSTRUCTION_RAM_POINTER(p, 0);
176 P_INLINE_ARRAY(p, ¯o[0], macro.size());
177 }
178
179 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)180 mme_hw_runner::run_macro(const std::vector<uint32_t>& macro,
181 const std::vector<uint32_t>& params)
182 {
183 push_macro(0, macro);
184
185 P_1INC(p, NV9097, CALL_MME_MACRO(0));
186 if (params.empty()) {
187 P_NV9097_CALL_MME_MACRO(p, 0, 0);
188 } else {
189 P_INLINE_ARRAY(p, ¶ms[0], params.size());
190 }
191
192 submit_push();
193 }
194
mme_fermi_sim_runner(uint64_t data_addr)195 mme_fermi_sim_runner::mme_fermi_sim_runner(uint64_t data_addr)
196 {
197 memset(&info, 0, sizeof(info));
198 info.cls_eng3d = FERMI_A;
199
200 memset(data_store, 0, sizeof(data_store));
201
202 this->devinfo = &info;
203 this->data_addr = data_addr,
204 this->data = data_store;
205 }
206
~mme_fermi_sim_runner()207 mme_fermi_sim_runner::~mme_fermi_sim_runner()
208 { }
209
210 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)211 mme_fermi_sim_runner::run_macro(const std::vector<uint32_t>& macro,
212 const std::vector<uint32_t>& params)
213 {
214 std::vector<mme_fermi_inst> insts(macro.size());
215 mme_fermi_decode(&insts[0], ¯o[0], macro.size());
216
217 /* First, make a copy of the data and simulate the macro */
218 mme_fermi_sim_mem sim_mem = {
219 .addr = data_addr,
220 .data = data,
221 .size = DATA_BO_SIZE,
222 };
223 const uint32_t* p_params = params.size() ? ¶ms[0] : NULL;
224 mme_fermi_sim(insts.size(), &insts[0],
225 params.size(), p_params,
226 1, &sim_mem);
227 }
228
mme_tu104_sim_runner(uint64_t data_addr)229 mme_tu104_sim_runner::mme_tu104_sim_runner(uint64_t data_addr)
230 {
231 memset(&info, 0, sizeof(info));
232 info.cls_eng3d = TURING_A;
233
234 memset(data_store, 0, sizeof(data_store));
235
236 this->devinfo = &info;
237 this->data_addr = data_addr,
238 this->data = data_store;
239 }
240
~mme_tu104_sim_runner()241 mme_tu104_sim_runner::~mme_tu104_sim_runner()
242 { }
243
244 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)245 mme_tu104_sim_runner::run_macro(const std::vector<uint32_t>& macro,
246 const std::vector<uint32_t>& params)
247 {
248 std::vector<mme_tu104_inst> insts(macro.size());
249 mme_tu104_decode(&insts[0], ¯o[0], macro.size() / 3);
250
251 /* First, make a copy of the data and simulate the macro */
252 mme_tu104_sim_mem sim_mem = {
253 .addr = data_addr,
254 .data = data,
255 .size = DATA_BO_SIZE,
256 };
257 const uint32_t* p_params = params.size() ? ¶ms[0] : NULL;
258 mme_tu104_sim(insts.size(), &insts[0],
259 params.size(), p_params,
260 1, &sim_mem);
261 }
262