1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "iris_batch.h"
25 #include "iris_context.h"
26 #include "iris_utrace.h"
27
28 #include "util/u_trace_gallium.h"
29
30 #include "ds/intel_driver_ds.h"
31
32 #ifdef MAJOR_IN_MKDEV
33 #include <sys/mkdev.h>
34 #endif
35 #ifdef MAJOR_IN_SYSMACROS
36 #include <sys/sysmacros.h>
37 #endif
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <unistd.h>
41
42 /** Timestamp structure format */
43 union iris_utrace_timestamp {
44 /* Timestamp writtem by either 2 * MI_STORE_REGISTER_MEM or
45 * PIPE_CONTROL.
46 */
47 uint64_t timestamp;
48
49 /* Timestamp written by COMPUTE_WALKER::PostSync
50 *
51 * Layout is described in PRMs.
52 * ATSM PRMs, Volume 2d: Command Reference: Structures, POSTSYNC_DATA:
53 *
54 * "The timestamp layout :
55 * [0] = 32b Context Timestamp Start
56 * [1] = 32b Global Timestamp Start
57 * [2] = 32b Context Timestamp End
58 * [3] = 32b Global Timestamp End"
59 */
60 uint32_t compute_walker[4];
61 };
62
63 static void *
iris_utrace_create_ts_buffer(struct u_trace_context * utctx,uint32_t size)64 iris_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
65 {
66 struct iris_context *ice =
67 container_of(utctx, struct iris_context, ds.trace_context);
68 struct pipe_context *ctx = &ice->ctx;
69 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
70 uint32_t iris_size =
71 (size / sizeof(uint64_t)) * sizeof(union iris_utrace_timestamp);
72
73 struct iris_bo *bo =
74 iris_bo_alloc(screen->bufmgr, "utrace timestamps",
75 iris_size, 16 /* alignment */,
76 IRIS_MEMZONE_OTHER,
77 BO_ALLOC_COHERENT | BO_ALLOC_SMEM);
78
79 void *ptr = iris_bo_map(NULL, bo, MAP_READ | MAP_WRITE);
80 memset(ptr, 0, iris_size);
81
82 return bo;
83 }
84
85 static void
iris_utrace_delete_ts_buffer(struct u_trace_context * utctx,void * timestamps)86 iris_utrace_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps)
87 {
88 struct iris_bo *bo = timestamps;
89 iris_bo_unreference(bo);
90 }
91
92 static void
iris_utrace_record_ts(struct u_trace * trace,void * cs,void * timestamps,unsigned idx,bool end_of_pipe)93 iris_utrace_record_ts(struct u_trace *trace, void *cs,
94 void *timestamps, unsigned idx,
95 bool end_of_pipe)
96 {
97 struct iris_batch *batch = container_of(trace, struct iris_batch, trace);
98 struct iris_context *ice = batch->ice;
99 struct iris_bo *bo = timestamps;
100 uint32_t ts_offset = idx * sizeof(union iris_utrace_timestamp);
101
102 iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE);
103
104 const bool is_end_compute =
105 (cs == NULL && ice->utrace.last_compute_walker != NULL && end_of_pipe);
106 if (is_end_compute) {
107 batch->screen->vtbl.rewrite_compute_walker_pc(
108 batch, ice->utrace.last_compute_walker, bo, ts_offset);
109 ice->utrace.last_compute_walker = NULL;
110 } else if (end_of_pipe) {
111 iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
112 PIPE_CONTROL_WRITE_TIMESTAMP,
113 bo, ts_offset, 0ull);
114 } else {
115 batch->screen->vtbl.store_register_mem64(batch, 0x2358,
116 bo, ts_offset,
117 false);
118 }
119 }
120
121 static uint64_t
iris_utrace_read_ts(struct u_trace_context * utctx,void * timestamps,unsigned idx,void * flush_data)122 iris_utrace_read_ts(struct u_trace_context *utctx,
123 void *timestamps, unsigned idx, void *flush_data)
124 {
125 struct iris_context *ice =
126 container_of(utctx, struct iris_context, ds.trace_context);
127 struct pipe_context *ctx = &ice->ctx;
128 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
129 struct iris_bo *bo = timestamps;
130
131 if (idx == 0)
132 iris_bo_wait_rendering(bo);
133
134 union iris_utrace_timestamp *ts = iris_bo_map(NULL, bo, MAP_READ);
135
136 /* Don't translate the no-timestamp marker: */
137 if (ts[idx].timestamp == U_TRACE_NO_TIMESTAMP)
138 return U_TRACE_NO_TIMESTAMP;
139
140 /* Detect a 16bytes timestamp write */
141 if (ts[idx].compute_walker[2] != 0 || ts[idx].compute_walker[3] != 0) {
142 /* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We
143 * need to rebuild the full 64bits using the previous timestamp. We
144 * assume that utrace is reading the timestamp in order. Anyway
145 * timestamp rollover on 32bits in a few minutes so in most cases that
146 * should be correct.
147 */
148 uint64_t timestamp =
149 (ice->utrace.last_full_timestamp & 0xffffffff00000000) |
150 (uint64_t) ts[idx].compute_walker[3];
151
152 return intel_device_info_timebase_scale(screen->devinfo, timestamp);
153 }
154
155 ice->utrace.last_full_timestamp = ts[idx].timestamp;
156
157 return intel_device_info_timebase_scale(screen->devinfo, ts[idx].timestamp);
158 }
159
160 static void
iris_utrace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)161 iris_utrace_delete_flush_data(struct u_trace_context *utctx,
162 void *flush_data)
163 {
164 free(flush_data);
165 }
166
iris_utrace_flush(struct iris_batch * batch,uint64_t submission_id)167 void iris_utrace_flush(struct iris_batch *batch, uint64_t submission_id)
168 {
169 struct intel_ds_flush_data *flush_data = malloc(sizeof(*flush_data));
170 intel_ds_flush_data_init(flush_data, &batch->ds, submission_id);
171 intel_ds_queue_flush_data(&batch->ds, &batch->trace, flush_data, false);
172 }
173
iris_utrace_init(struct iris_context * ice)174 void iris_utrace_init(struct iris_context *ice)
175 {
176 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
177
178 struct stat st;
179 uint32_t minor;
180
181 if (fstat(screen->fd, &st) == 0)
182 minor = minor(st.st_rdev);
183 else
184 minor = 0;
185
186 intel_ds_device_init(&ice->ds, screen->devinfo, screen->fd, minor,
187 INTEL_DS_API_OPENGL);
188
189 u_trace_context_init(&ice->ds.trace_context, &ice->ctx,
190 iris_utrace_create_ts_buffer,
191 iris_utrace_delete_ts_buffer,
192 iris_utrace_record_ts,
193 iris_utrace_read_ts,
194 iris_utrace_delete_flush_data);
195
196 for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
197 intel_ds_device_init_queue(&ice->ds, &ice->batches[i].ds, "%s",
198 iris_batch_name_to_string(i));
199 }
200 }
201
iris_utrace_fini(struct iris_context * ice)202 void iris_utrace_fini(struct iris_context *ice)
203 {
204 intel_ds_device_fini(&ice->ds);
205 }
206
207 enum intel_ds_stall_flag
iris_utrace_pipe_flush_bit_to_ds_stall_flag(uint32_t flags)208 iris_utrace_pipe_flush_bit_to_ds_stall_flag(uint32_t flags)
209 {
210 static const struct {
211 uint32_t iris;
212 enum intel_ds_stall_flag ds;
213 } iris_to_ds_flags[] = {
214 { .iris = PIPE_CONTROL_DEPTH_CACHE_FLUSH, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
215 { .iris = PIPE_CONTROL_DATA_CACHE_FLUSH, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
216 { .iris = PIPE_CONTROL_TILE_CACHE_FLUSH, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
217 { .iris = PIPE_CONTROL_RENDER_TARGET_FLUSH, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
218 { .iris = PIPE_CONTROL_STATE_CACHE_INVALIDATE, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
219 { .iris = PIPE_CONTROL_CONST_CACHE_INVALIDATE, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
220 { .iris = PIPE_CONTROL_VF_CACHE_INVALIDATE, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
221 { .iris = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
222 { .iris = PIPE_CONTROL_INSTRUCTION_INVALIDATE, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
223 { .iris = PIPE_CONTROL_DEPTH_STALL, .ds = INTEL_DS_DEPTH_STALL_BIT, },
224 { .iris = PIPE_CONTROL_CS_STALL, .ds = INTEL_DS_CS_STALL_BIT, },
225 { .iris = PIPE_CONTROL_FLUSH_HDC, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
226 { .iris = PIPE_CONTROL_STALL_AT_SCOREBOARD, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
227 { .iris = PIPE_CONTROL_UNTYPED_DATAPORT_CACHE_FLUSH, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, },
228 { .iris = PIPE_CONTROL_CCS_CACHE_FLUSH, .ds = INTEL_DS_CCS_CACHE_FLUSH_BIT, },
229 };
230
231 enum intel_ds_stall_flag ret = 0;
232 for (uint32_t i = 0; i < ARRAY_SIZE(iris_to_ds_flags); i++) {
233 if (iris_to_ds_flags[i].iris & flags)
234 ret |= iris_to_ds_flags[i].ds;
235 }
236
237 assert(ret != 0);
238
239 return ret;
240 }
241