• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef __FREEDRENO_GUARDBAND_H__
25 #define __FREEDRENO_GUARDBAND_H__
26 
27 #include <math.h>
28 #include <stdbool.h>
29 #include <assert.h>
30 
31 static inline unsigned
fd_calc_guardband(float offset,float scale,bool is_a3xx)32 fd_calc_guardband(float offset, float scale, bool is_a3xx)
33 {
34 	/* On a3xx, the viewport max is 4k and the docs say the max guardband
35 	 * width is 8k. That is, GRAS cannot handle triangle coordinates more than
36 	 * 8k, positive or negative. On a4xx+ the viewport width was bumped to
37 	 * 16k, and so the guardband width was necessarily also bumped. Note that
38 	 * the numbers here should correspond to
39 	 * VkPhysicalDeviceLimits::viewportBoundsRange in Vulkan.
40 	 */
41 	const float gb_min = is_a3xx ? -8192. : -32768.;
42 	const float gb_max = is_a3xx ?  8191. :  32767.;
43 
44 	/* Clipping happens in normalized device coordinates, so we have to
45 	 * transform gb_min and gb_max to ndc using the inverse of the viewport
46 	 * transform. Avoid flipping min and max by using the absolute value of
47 	 * the scale.
48 	 */
49 	const float gb_min_ndc = (gb_min - offset) / fabsf(scale);
50 	const float gb_max_ndc = (gb_max - offset) / fabsf(scale);
51 
52 	/* There's only one GB_ADJ field, so presumably the guardband is
53 	 * [-GB_ADJ, GB_ADJ] like on Radeon. It's always safe to make the
54 	 * guardband smaller, so we have to take the min to get the largest range
55 	 * contained in [gb_min_ndc, gb_max_ndc].
56 	 */
57 	const float gb_adj = fminf(-gb_min_ndc, gb_max_ndc);
58 
59 	/* The viewport should always be contained in the guardband. */
60 	assert(gb_adj >= 1.0);
61 
62 	/* frexp returns an unspecified value if given an infinite value, which
63 	 * can happen if scale == 0.
64 	 */
65 	if (isinf(gb_adj))
66 		return 0x1ff;
67 
68 	/* Convert gb_adj to 3.6 floating point, rounding down since it's always
69 	 * safe to make the guard band smaller (but not the other way around!).
70 	 *
71 	 * Note: After converting back to a float, the value the blob returns here
72 	 * is sometimes a little smaller than the value we return. This seems to
73 	 * happen around the boundary between two different rounded values. For
74 	 * example, using the a6xx blob:
75 	 *
76 	 * min  | width  | unrounded gb_adj | blob result | mesa result
77 	 * ------------------------------------------------------------
78 	 * 0    | 510    |          127.498 |        127. |        127.
79 	 * 0    | 511    |          127.247 |        126. |        127.
80 	 * 0    | 512    |          126.996 |        126. |        126.
81 	 *
82 	 * The guardband must be 32767 wide, since that's what the blob reports
83 	 * for viewportBoundsRange, so I'm guessing that they're rounding slightly
84 	 * more conservatively somehow.
85 	 */
86 	int gb_adj_exp;
87 	float gb_adj_mantissa = frexpf(gb_adj, &gb_adj_exp);
88 	assert(gb_adj_exp > 0);
89 
90 	/* Round non-representable numbers down to the largest possible number. */
91 	if (gb_adj_exp > 8)
92 		return 0x1ff;
93 
94 	return ((gb_adj_exp - 1) << 6) |
95 		((unsigned) truncf(gb_adj_mantissa * (1 << 7)) - (1 << 6));
96 }
97 
98 #endif /* __FREEDRENO_GUARDBAND_H__ */
99