• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "mme_runner.h"
6 
7 #include <fcntl.h>
8 #include <string.h>
9 #include <xf86drm.h>
10 
11 #include "mme_fermi_sim.h"
12 #include "mme_tu104_sim.h"
13 
14 #include "nvk_clc597.h"
15 
16 #include "nouveau_bo.h"
17 #include "nouveau_context.h"
18 
19 /* nouveau_drm.h isn't C++-friendly */
20 #define class cls
21 #include "drm-uapi/nouveau_drm.h"
22 #undef class
23 
mme_runner()24 mme_runner::mme_runner() :
25   devinfo(NULL), data_addr(0), data(NULL)
26 { }
27 
~mme_runner()28 mme_runner::~mme_runner()
29 { }
30 
mme_hw_runner()31 mme_hw_runner::mme_hw_runner() :
32   mme_runner(), p(NULL), dev(NULL), ctx(NULL),
33   data_bo(NULL), push_bo(NULL),
34   syncobj(0),
35   push_map(NULL)
36 {
37    memset(&push, 0, sizeof(push));
38 }
39 
40 void
mme_store_data(mme_builder * b,uint32_t dw_idx,mme_value data,bool free_reg)41 mme_runner::mme_store_data(mme_builder *b, uint32_t dw_idx,
42                            mme_value data, bool free_reg)
43 {
44    mme_store_imm_addr(b, data_addr + dw_idx * 4, data, free_reg);
45 }
46 
~mme_hw_runner()47 mme_hw_runner::~mme_hw_runner()
48 {
49    if (syncobj)
50       drmSyncobjDestroy(dev->fd, syncobj);
51    if (push_bo) {
52       nouveau_ws_bo_unmap(push_bo, push_map);
53       nouveau_ws_bo_destroy(push_bo);
54    }
55    if (ctx)
56       nouveau_ws_context_destroy(ctx);
57    if (dev)
58       nouveau_ws_device_destroy(dev);
59 }
60 
61 #define PUSH_SIZE 64 * 4096
62 
63 bool
set_up_hw(uint16_t min_cls,uint16_t max_cls)64 mme_hw_runner::set_up_hw(uint16_t min_cls, uint16_t max_cls)
65 {
66    drmDevicePtr devices[8];
67    int max_devices = drmGetDevices2(0, devices, 8);
68 
69    int i;
70    for (i = 0; i < max_devices; i++) {
71       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
72           devices[i]->bustype == DRM_BUS_PCI &&
73           devices[i]->deviceinfo.pci->vendor_id == 0x10de) {
74          dev = nouveau_ws_device_new(devices[i]);
75          if (dev == NULL)
76             continue;
77 
78          if (dev->info.cls_eng3d < min_cls || dev->info.cls_eng3d > max_cls) {
79             nouveau_ws_device_destroy(dev);
80             dev = NULL;
81             continue;
82          }
83 
84          /* Found a Turning+ device */
85          break;
86       }
87    }
88 
89    if (dev == NULL)
90       return false;
91 
92    devinfo = &dev->info;
93 
94    int ret = nouveau_ws_context_create(dev, NOUVEAU_WS_ENGINE_3D, &ctx);
95    if (ret)
96       return false;
97 
98    uint32_t data_bo_flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP;
99    data_bo = nouveau_ws_bo_new_mapped(dev, DATA_BO_SIZE, 0,
100                                       (nouveau_ws_bo_flags)data_bo_flags,
101                                       NOUVEAU_WS_BO_RDWR, (void **)&data);
102    if (data_bo == NULL)
103       return false;
104 
105    memset(data, 139, DATA_BO_SIZE);
106    data_addr = data_bo->offset;
107 
108    uint32_t push_bo_flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP;
109    push_bo = nouveau_ws_bo_new_mapped(dev, PUSH_SIZE, 0,
110                                       (nouveau_ws_bo_flags)push_bo_flags,
111                                       NOUVEAU_WS_BO_WR, &push_map);
112    if (push_bo == NULL)
113       return false;
114 
115    ret = drmSyncobjCreate(dev->fd, 0, &syncobj);
116    if (ret < 0)
117       return false;
118 
119    reset_push();
120 
121    return true;
122 }
123 
124 void
reset_push()125 mme_hw_runner::reset_push()
126 {
127    nv_push_init(&push, (uint32_t *)push_map, PUSH_SIZE / 4);
128    p = &push;
129 
130    P_MTHD(p, NV9097, SET_OBJECT);
131    P_NV9097_SET_OBJECT(p, {
132       .class_id = dev->info.cls_eng3d,
133       .engine_id = 0,
134    });
135 }
136 
137 void
submit_push()138 mme_hw_runner::submit_push()
139 {
140    struct drm_nouveau_exec_push push = {
141       .va = push_bo->offset,
142       .va_len = (uint32_t)nv_push_dw_count(&this->push) * 4,
143    };
144 
145    struct drm_nouveau_sync sync = {
146       .flags = DRM_NOUVEAU_SYNC_SYNCOBJ,
147       .handle = syncobj,
148       .timeline_value = 0,
149    };
150 
151    struct drm_nouveau_exec req = {
152       .channel = (uint32_t)ctx->channel,
153       .push_count = 1,
154       .sig_count = 1,
155       .sig_ptr = (uintptr_t)&sync,
156       .push_ptr = (uintptr_t)&push,
157    };
158 
159    int ret = drmCommandWriteRead(dev->fd, DRM_NOUVEAU_EXEC,
160                                  &req, sizeof(req));
161    ASSERT_EQ(ret, 0);
162 
163    ret = drmSyncobjWait(dev->fd, &syncobj, 1, INT64_MAX,
164                         DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
165    ASSERT_EQ(ret, 0);
166 }
167 
168 void
push_macro(uint32_t id,const std::vector<uint32_t> & macro)169 mme_hw_runner::push_macro(uint32_t id, const std::vector<uint32_t> &macro)
170 {
171    P_MTHD(p, NV9097, LOAD_MME_START_ADDRESS_RAM_POINTER);
172    P_NV9097_LOAD_MME_START_ADDRESS_RAM_POINTER(p, id);
173    P_NV9097_LOAD_MME_START_ADDRESS_RAM(p, 0);
174    P_1INC(p, NV9097, LOAD_MME_INSTRUCTION_RAM_POINTER);
175    P_NV9097_LOAD_MME_INSTRUCTION_RAM_POINTER(p, 0);
176    P_INLINE_ARRAY(p, &macro[0], macro.size());
177 }
178 
179 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)180 mme_hw_runner::run_macro(const std::vector<uint32_t>& macro,
181                          const std::vector<uint32_t>& params)
182 {
183    push_macro(0, macro);
184 
185    P_1INC(p, NV9097, CALL_MME_MACRO(0));
186    if (params.empty()) {
187       P_NV9097_CALL_MME_MACRO(p, 0, 0);
188    } else {
189       P_INLINE_ARRAY(p, &params[0], params.size());
190    }
191 
192    submit_push();
193 }
194 
mme_fermi_sim_runner(uint64_t data_addr)195 mme_fermi_sim_runner::mme_fermi_sim_runner(uint64_t data_addr)
196 {
197    memset(&info, 0, sizeof(info));
198    info.cls_eng3d = FERMI_A;
199 
200    memset(data_store, 0, sizeof(data_store));
201 
202    this->devinfo = &info;
203    this->data_addr = data_addr,
204    this->data = data_store;
205 }
206 
~mme_fermi_sim_runner()207 mme_fermi_sim_runner::~mme_fermi_sim_runner()
208 { }
209 
210 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)211 mme_fermi_sim_runner::run_macro(const std::vector<uint32_t>& macro,
212                                 const std::vector<uint32_t>& params)
213 {
214    std::vector<mme_fermi_inst> insts(macro.size());
215    mme_fermi_decode(&insts[0], &macro[0], macro.size());
216 
217    /* First, make a copy of the data and simulate the macro */
218    mme_fermi_sim_mem sim_mem = {
219       .addr = data_addr,
220       .data = data,
221       .size = DATA_BO_SIZE,
222    };
223    const uint32_t* p_params = params.size() ? &params[0] : NULL;
224    mme_fermi_sim(insts.size(), &insts[0],
225                  params.size(), p_params,
226                  1, &sim_mem);
227 }
228 
mme_tu104_sim_runner(uint64_t data_addr)229 mme_tu104_sim_runner::mme_tu104_sim_runner(uint64_t data_addr)
230 {
231    memset(&info, 0, sizeof(info));
232    info.cls_eng3d = TURING_A;
233 
234    memset(data_store, 0, sizeof(data_store));
235 
236    this->devinfo = &info;
237    this->data_addr = data_addr,
238    this->data = data_store;
239 }
240 
~mme_tu104_sim_runner()241 mme_tu104_sim_runner::~mme_tu104_sim_runner()
242 { }
243 
244 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)245 mme_tu104_sim_runner::run_macro(const std::vector<uint32_t>& macro,
246                                const std::vector<uint32_t>& params)
247 {
248    std::vector<mme_tu104_inst> insts(macro.size());
249    mme_tu104_decode(&insts[0], &macro[0], macro.size() / 3);
250 
251    /* First, make a copy of the data and simulate the macro */
252    mme_tu104_sim_mem sim_mem = {
253       .addr = data_addr,
254       .data = data,
255       .size = DATA_BO_SIZE,
256    };
257    const uint32_t* p_params = params.size() ? &params[0] : NULL;
258    mme_tu104_sim(insts.size(), &insts[0],
259                  params.size(), p_params,
260                  1, &sim_mem);
261 }
262