• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_util.h"
7 
8 #include <errno.h>
9 #include <stdarg.h>
10 
11 #include "util/u_math.h"
12 #include "util/timespec.h"
13 #include "vk_enum_to_str.h"
14 
15 #include "tu_device.h"
16 #include "tu_pass.h"
17 
18 void PRINTFLIKE(3, 4)
__tu_finishme(const char * file,int line,const char * format,...)19    __tu_finishme(const char *file, int line, const char *format, ...)
20 {
21    va_list ap;
22    char buffer[256];
23 
24    va_start(ap, format);
25    vsnprintf(buffer, sizeof(buffer), format, ap);
26    va_end(ap);
27 
28    mesa_loge("%s:%d: FINISHME: %s\n", file, line, buffer);
29 }
30 
31 VkResult
__vk_startup_errorf(struct tu_instance * instance,VkResult error,bool always_print,const char * file,int line,const char * format,...)32 __vk_startup_errorf(struct tu_instance *instance,
33                     VkResult error,
34                     bool always_print,
35                     const char *file,
36                     int line,
37                     const char *format,
38                     ...)
39 {
40    va_list ap;
41    char buffer[256];
42 
43    const char *error_str = vk_Result_to_str(error);
44 
45 #ifndef DEBUG
46    if (!always_print)
47       return error;
48 #endif
49 
50    if (format) {
51       va_start(ap, format);
52       vsnprintf(buffer, sizeof(buffer), format, ap);
53       va_end(ap);
54 
55       mesa_loge("%s:%d: %s (%s)\n", file, line, buffer, error_str);
56    } else {
57       mesa_loge("%s:%d: %s\n", file, line, error_str);
58    }
59 
60    return error;
61 }
62 
63 static void
tu_tiling_config_update_tile_layout(struct tu_framebuffer * fb,const struct tu_device * dev,const struct tu_render_pass * pass,enum tu_gmem_layout gmem_layout)64 tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
65                                     const struct tu_device *dev,
66                                     const struct tu_render_pass *pass,
67                                     enum tu_gmem_layout gmem_layout)
68 {
69    const uint32_t tile_align_w = pass->tile_align_w;
70    const uint32_t tile_align_h = dev->physical_device->info->tile_align_h;
71    const uint32_t max_tile_width = dev->physical_device->info->tile_max_w;
72    const uint32_t max_tile_height = dev->physical_device->info->tile_max_h;
73    struct tu_tiling_config *tiling = &fb->tiling[gmem_layout];
74 
75    /* start from 1 tile */
76    tiling->tile_count = (VkExtent2D) {
77       .width = 1,
78       .height = 1,
79    };
80    tiling->tile0 = (VkExtent2D) {
81       .width = util_align_npot(fb->width, tile_align_w),
82       .height = align(fb->height, tile_align_h),
83    };
84 
85    /* will force to sysmem, don't bother trying to have a valid tile config
86     * TODO: just skip all GMEM stuff when sysmem is forced?
87     */
88    if (!pass->gmem_pixels[gmem_layout])
89       return;
90 
91    if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
92       /* start with 2x2 tiles */
93       tiling->tile_count.width = 2;
94       tiling->tile_count.height = 2;
95       tiling->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w);
96       tiling->tile0.height = align(DIV_ROUND_UP(fb->height, 2), tile_align_h);
97    }
98 
99    /* do not exceed max tile width */
100    while (tiling->tile0.width > max_tile_width) {
101       tiling->tile_count.width++;
102       tiling->tile0.width =
103          util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
104    }
105 
106    /* do not exceed max tile height */
107    while (tiling->tile0.height > max_tile_height) {
108       tiling->tile_count.height++;
109       tiling->tile0.height =
110          util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
111    }
112 
113    /* do not exceed gmem size */
114    while (tiling->tile0.width * tiling->tile0.height > pass->gmem_pixels[gmem_layout]) {
115       if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) {
116          tiling->tile_count.width++;
117          tiling->tile0.width =
118             util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
119       } else {
120          /* if this assert fails then layout is impossible.. */
121          assert(tiling->tile0.height > tile_align_h);
122          tiling->tile_count.height++;
123          tiling->tile0.height =
124             align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
125       }
126    }
127 }
128 
129 static void
tu_tiling_config_update_pipe_layout(struct tu_tiling_config * tiling,const struct tu_device * dev)130 tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
131                                     const struct tu_device *dev)
132 {
133    const uint32_t max_pipe_count = 32; /* A6xx */
134 
135    /* start from 1 tile per pipe */
136    tiling->pipe0 = (VkExtent2D) {
137       .width = 1,
138       .height = 1,
139    };
140    tiling->pipe_count = tiling->tile_count;
141 
142    while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) {
143       if (tiling->pipe0.width < tiling->pipe0.height) {
144          tiling->pipe0.width += 1;
145          tiling->pipe_count.width =
146             DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width);
147       } else {
148          tiling->pipe0.height += 1;
149          tiling->pipe_count.height =
150             DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height);
151       }
152    }
153 }
154 
155 static void
tu_tiling_config_update_pipes(struct tu_tiling_config * tiling,const struct tu_device * dev)156 tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
157                               const struct tu_device *dev)
158 {
159    const uint32_t max_pipe_count = 32; /* A6xx */
160    const uint32_t used_pipe_count =
161       tiling->pipe_count.width * tiling->pipe_count.height;
162    const VkExtent2D last_pipe = {
163       .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
164       .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
165    };
166 
167    assert(used_pipe_count <= max_pipe_count);
168    assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
169 
170    for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
171       for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
172          const uint32_t pipe_x = tiling->pipe0.width * x;
173          const uint32_t pipe_y = tiling->pipe0.height * y;
174          const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
175                                     ? last_pipe.width
176                                     : tiling->pipe0.width;
177          const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
178                                     ? last_pipe.height
179                                     : tiling->pipe0.height;
180          const uint32_t n = tiling->pipe_count.width * y + x;
181 
182          tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
183                                   A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
184                                   A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
185                                   A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
186          tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
187       }
188    }
189 
190    memset(tiling->pipe_config + used_pipe_count, 0,
191           sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
192 }
193 
194 static bool
is_hw_binning_possible(const struct tu_tiling_config * tiling)195 is_hw_binning_possible(const struct tu_tiling_config *tiling)
196 {
197    /* Similar to older gens, # of tiles per pipe cannot be more than 32.
198     * But there are no hangs with 16 or more tiles per pipe in either
199     * X or Y direction, so that limit does not seem to apply.
200     */
201    uint32_t tiles_per_pipe = tiling->pipe0.width * tiling->pipe0.height;
202    return tiles_per_pipe <= 32;
203 }
204 
205 static void
tu_tiling_config_update_binning(struct tu_tiling_config * tiling,const struct tu_device * device)206 tu_tiling_config_update_binning(struct tu_tiling_config *tiling, const struct tu_device *device)
207 {
208    tiling->binning_possible = is_hw_binning_possible(tiling);
209 
210    if (tiling->binning_possible) {
211       tiling->binning = (tiling->tile_count.width * tiling->tile_count.height) > 2;
212 
213       if (unlikely(device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN))
214          tiling->binning = true;
215       if (unlikely(device->physical_device->instance->debug_flags &
216                    TU_DEBUG_NOBIN))
217          tiling->binning = false;
218    } else {
219       tiling->binning = false;
220    }
221 }
222 
223 void
tu_framebuffer_tiling_config(struct tu_framebuffer * fb,const struct tu_device * device,const struct tu_render_pass * pass)224 tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
225                              const struct tu_device *device,
226                              const struct tu_render_pass *pass)
227 {
228    for (int gmem_layout = 0; gmem_layout < TU_GMEM_LAYOUT_COUNT; gmem_layout++) {
229       struct tu_tiling_config *tiling = &fb->tiling[gmem_layout];
230       tu_tiling_config_update_tile_layout(fb, device, pass, gmem_layout);
231       tu_tiling_config_update_pipe_layout(tiling, device);
232       tu_tiling_config_update_pipes(tiling, device);
233       tu_tiling_config_update_binning(tiling, device);
234    }
235 }
236 
237 void
tu_dbg_log_gmem_load_store_skips(struct tu_device * device)238 tu_dbg_log_gmem_load_store_skips(struct tu_device *device)
239 {
240    static uint32_t last_skipped_loads = 0;
241    static uint32_t last_skipped_stores = 0;
242    static uint32_t last_total_loads = 0;
243    static uint32_t last_total_stores = 0;
244    static struct timespec last_time = {};
245 
246    pthread_mutex_lock(&device->submit_mutex);
247 
248    struct timespec current_time;
249    clock_gettime(CLOCK_MONOTONIC, &current_time);
250 
251    if (timespec_sub_to_nsec(&current_time, &last_time) > 1000 * 1000 * 1000) {
252       last_time = current_time;
253    } else {
254       pthread_mutex_unlock(&device->submit_mutex);
255       return;
256    }
257 
258    struct tu6_global *global = device->global_bo->map;
259 
260    uint32_t current_taken_loads = global->dbg_gmem_taken_loads;
261    uint32_t current_taken_stores = global->dbg_gmem_taken_stores;
262    uint32_t current_total_loads = global->dbg_gmem_total_loads;
263    uint32_t current_total_stores = global->dbg_gmem_total_stores;
264 
265    uint32_t skipped_loads = current_total_loads - current_taken_loads;
266    uint32_t skipped_stores = current_total_stores - current_taken_stores;
267 
268    uint32_t current_time_frame_skipped_loads = skipped_loads - last_skipped_loads;
269    uint32_t current_time_frame_skipped_stores = skipped_stores - last_skipped_stores;
270 
271    uint32_t current_time_frame_total_loads = current_total_loads - last_total_loads;
272    uint32_t current_time_frame_total_stores = current_total_stores - last_total_stores;
273 
274    mesa_logi("[GMEM] loads total: %u skipped: %.1f%%\n",
275          current_time_frame_total_loads,
276          current_time_frame_skipped_loads / (float) current_time_frame_total_loads * 100.f);
277    mesa_logi("[GMEM] stores total: %u skipped: %.1f%%\n",
278          current_time_frame_total_stores,
279          current_time_frame_skipped_stores / (float) current_time_frame_total_stores * 100.f);
280 
281    last_skipped_loads = skipped_loads;
282    last_skipped_stores = skipped_stores;
283    last_total_loads = current_total_loads;
284    last_total_stores = current_total_stores;
285 
286    pthread_mutex_unlock(&device->submit_mutex);
287 }
288