1 /* 2 * Copyright 2016 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can 5 * be found in the LICENSE file. 6 * 7 */ 8 9 #ifndef SKC_ONCE_TILE 10 #define SKC_ONCE_TILE 11 12 // 13 // 14 // 15 16 #include "macros.h" 17 18 // 19 // Hard requirements: 20 // 21 // - A TTXB "block pool" extent that is at least 1GB. 22 // 23 // - A virtual surface of at least 8K x 8K 24 // 25 // - A physical surface of __don't really care__ because it's 26 // advantageous to tile the physical surface since it's likely 27 // to shrink the post-place TTCK sorting step. 28 // 29 // 30 // EXTENT TTXB BITS 31 // SIZE (MB) +------------------------------------+ 32 // | 22 23 24 25 26 27 | 33 // +----+------------------------------------+ 34 // | 8 | 128 256 512 1024 2048 4096 | 35 // TTXB | 16 | 256 512 1024 2048 4096 8192 | 36 // WORDS | 32 | 512 1024 2048 4096 8192 16384 | 37 // | 64 | 1024 2048 4096 8192 16384 32768 | 38 // +----+------------------------------------+ 39 // 40 // 41 // SURF X/Y BITS 42 // TILE +------------------------------------------------------+ 43 // | 5 6 7 8 9 10 11 12 13 | 44 // +----+------------------------------------------------------+ 45 // | 3 | 256 512 1024 2048 4096 8192 16384 32768 65536 | 46 // TILE | 4 | 512 1024 2048 4096 8192 16384 32768 65536 128K | 47 // SIDE | 5 | 1024 2048 4096 8192 16384 32768 65536 128K 256K | 48 // BITS | 6 | 2048 4096 8192 16384 32768 65536 128K 256K 512K | 49 // | 7 | 4096 8192 16384 32768 65536 128K 256K 512K 1024K | 50 // +----+------------------------------------------------------+ 51 // TILES^2 | 1024 4096 16384 65536 256K 1M 4M 16M 64M | 52 // +------------------------------------------------------+ 53 // 54 // The following values should be pretty future-proof across all GPUs: 55 // 56 // - The minimum addressable subblock size is 16 words (64 bytes) to 57 // ensure there is enough space for a path or raster header and 58 // its payload. 59 // 60 // - Blocks are power-of-2 multiples of subblocks. Larger blocks can 61 // reduce allocation activity (fewer atomic adds). 62 // 63 // - 27 bits of TTXB_ID space implies a max of 4GB-32GB of 64 // rasterized paths depending on the size of the TTXB block. 65 // This could enable interesting use cases. 66 // 67 // - A virtual rasterization surface that's from +/-16K to +/-128K 68 // depending on the size of the TTXB block. 69 // 70 // - Keys that (optionally) only require a 32-bit high word 71 // comparison. 72 // 73 // - Support for a minimum of 256K layers. This can be practically 74 // raised to 1m or 2m layers. 75 // 76 77 // 78 // TTRK (64-bit COMPARE) 79 // 80 // 0 63 81 // | TTSB ID | X | Y | RASTER COHORT ID | 82 // +---------+------+------+------------------+ 83 // | 27 | 12 | 12 | 13 | 84 // 85 // 86 // TTRK (32-BIT COMPARE) ( DEFAULT ) 87 // 88 // 0 63 89 // | TTSB ID | N/A | X | Y | RASTER COHORT ID | 90 // +---------+-----+------+------+------------------+ 91 // | 27 | 5 | 12 | 12 | 8 | 92 // 93 // 94 // TTSK v1 95 // 96 // 0 63 97 // | TTSB ID | IS_PREFIX | N/A | X | Y | 98 // +---------+-----------+------+----+----+ 99 // | 27 | 1 (=0) | 12 | 12 | 12 | 100 // 101 // 102 // TTPK v2 ( DEFAULT ) 103 // 104 // 0 63 105 // | TTPB ID | IS_PREFIX | SPAN | X | Y | 106 // +---------+-----------+------+-----+-----+ 107 // | 27 | 1 (=1) | 12 | 12 | 12 | 108 // 109 // 110 // TTCK (32-BIT COMPARE) v1 111 // 112 // 0 63 113 // | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | 114 // +----------------------+--------+--------+-------+-----+-----+ 115 // | 30 | 1 | 1 | 18 | 7 | 7 | 116 // 117 // 118 // TTCK (32-BIT COMPARE) v2 119 // 120 // 0 63 121 // | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | 122 // +----------------------+--------+--------+-------+-----+-----+ 123 // | 30 | 1 | 1 | 15 | 9 | 8 | 124 // 125 // 126 // TTCK (64-BIT COMPARE) -- achieves 4K x 4K with an 8x16 tile ( DEFAULT ) 127 // 128 // 0 63 129 // | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | 130 // +----------------------+--------+--------+-------+-----+-----+ 131 // | 27 | 1 | 1 | 18 | 9 | 8 | 132 // 133 134 // 135 // 136 // 137 138 #define SKC_SUBPIXEL_RESL_X_LOG2 5 139 #define SKC_SUBPIXEL_RESL_Y_LOG2 5 140 141 // 142 // FIXME -- COMMON -- HOIST ELSEWHERE 143 // 144 145 #define SKC_TILE_WIDTH (1 << SKC_TILE_WIDTH_LOG2) 146 #define SKC_TILE_HEIGHT (1 << SKC_TILE_HEIGHT_LOG2) 147 148 #define SKC_SUBPIXEL_RESL_X (1 << SKC_SUBPIXEL_RESL_X_LOG2) 149 #define SKC_SUBPIXEL_RESL_Y (1 << SKC_SUBPIXEL_RESL_Y_LOG2) 150 151 // 152 // PLATFORM SURFACE TILE SIZE 153 // 154 155 #define SKC_TILE_WIDTH_MASK SKC_BITS_TO_MASK(SKC_TILE_WIDTH_LOG2) 156 #define SKC_TILE_HEIGHT_MASK SKC_BITS_TO_MASK(SKC_TILE_HEIGHT_LOG2) 157 158 // 159 // TILE SUBPIXEL RESOLUTION 160 // 161 162 #define SKC_SUBPIXEL_RESL_X (1 << SKC_SUBPIXEL_RESL_X_LOG2) 163 #define SKC_SUBPIXEL_RESL_Y (1 << SKC_SUBPIXEL_RESL_Y_LOG2) 164 165 #define SKC_SUBPIXEL_MASK_X SKC_BITS_TO_MASK(SKC_SUBPIXEL_RESL_X_LOG2) 166 #define SKC_SUBPIXEL_MASK_Y SKC_BITS_TO_MASK(SKC_SUBPIXEL_RESL_Y_LOG2) 167 168 #define SKC_SUBPIXEL_RESL_X_F32 ((float)(SKC_SUBPIXEL_RESL_X)) 169 #define SKC_SUBPIXEL_RESL_Y_F32 ((float)(SKC_SUBPIXEL_RESL_Y)) 170 171 #define SKC_SUBPIXEL_X_SCALE_UP SKC_SUBPIXEL_RESL_X_F32 172 #define SKC_SUBPIXEL_Y_SCALE_UP SKC_SUBPIXEL_RESL_Y_F32 173 174 #define SKC_SUBPIXEL_X_SCALE_DOWN (1.0f / SKC_SUBPIXEL_RESL_X_F32) 175 #define SKC_SUBPIXEL_Y_SCALE_DOWN (1.0f / SKC_SUBPIXEL_RESL_Y_F32) 176 177 // 178 // SUBTILE RESOLUTION 179 // 180 181 #define SKC_SUBTILE_RESL_X_LOG2 (SKC_TILE_WIDTH_LOG2 + SKC_SUBPIXEL_RESL_X_LOG2) 182 #define SKC_SUBTILE_RESL_Y_LOG2 (SKC_TILE_HEIGHT_LOG2 + SKC_SUBPIXEL_RESL_Y_LOG2) 183 184 #define SKC_SUBTILE_RESL_X (1 << SKC_SUBTILE_RESL_X_LOG2) 185 #define SKC_SUBTILE_RESL_Y (1 << SKC_SUBTILE_RESL_Y_LOG2) 186 187 #define SKC_SUBTILE_MASK_X SKC_BITS_TO_MASK(SKC_SUBTILE_RESL_X_LOG2) 188 #define SKC_SUBTILE_MASK_Y SKC_BITS_TO_MASK(SKC_SUBTILE_RESL_Y_LOG2) 189 190 #define SKC_SUBTILE_RESL_X_F32 ((float)(SKC_SUBTILE_RESL_X)) 191 #define SKC_SUBTILE_RESL_Y_F32 ((float)(SKC_SUBTILE_RESL_Y)) 192 193 #define SKC_SUBTILE_X_SCALE_DOWN (1.0f / SKC_SUBTILE_RESL_X_F32) 194 #define SKC_SUBTILE_Y_SCALE_DOWN (1.0f / SKC_SUBTILE_RESL_Y_F32) 195 196 // 197 // 198 // 199 200 #define SKC_TILE_X_OFFSET_U32 (1 << (SKC_TTSK_BITS_X-1)) 201 #define SKC_TILE_X_SPAN_U32 (1 << (SKC_TTSK_BITS_X)) // exclusive 202 203 #define SKC_TILE_Y_OFFSET_U32 (1 << (SKC_TTSK_BITS_Y-1)) 204 #define SKC_TILE_Y_SPAN_U32 (1 << (SKC_TTSK_BITS_Y)) // exclusive 205 206 #define SKC_TILE_X_OFFSET_F32 0 // ((float)SKC_TILE_X_OFFSET_U32) 207 #define SKC_TILE_X_SPAN_F32 ((float)SKC_TILE_X_SPAN_U32) 208 209 #define SKC_TILE_Y_OFFSET_F32 0 // ((float)SKC_TILE_Y_OFFSET_U32) 210 #define SKC_TILE_Y_SPAN_F32 ((float)SKC_TILE_Y_SPAN_U32) 211 212 // 213 // TILE TRACE SUBPIXEL, PREFIX & COMPOSITION KEYS 214 // 215 // These keys are are purposefully 64-bits so they can be sorted with 216 // Hotsort's 32:32 or 64-bit implementation. 217 // 218 // Tiles are 32x32 on CUDA but can be made rectangular or smaller to 219 // fit other architectures. 220 // 221 // TW : tile width 222 // TH : tile height 223 // 224 // TTS : tile trace subpixel 225 // TTSB : tile trace subpixel block 226 // TTRK : tile trace subpixel key while in raster cohort 227 // TTSK : tile trace subpixel key 228 // 229 // TTP : tile trace prefix 230 // TTPB : tile trace prefix block 231 // TTPK : tile trace prefix key 232 // 233 // TTCK : tile trace composition key 234 // 235 236 // 237 // TILE TRACE SUBPIXEL 238 // 239 // The subpixels are encoded with either absolute tile coordinates 240 // (32-bits) or packed in delta-encoded form form. 241 // 242 // For 32-bit subpixel packing of a 32x32 or smaller tile: 243 // 244 // A tile X is encoded as: 245 // 246 // TX : 10 : unsigned min(x0,x1) tile subpixel coordinate. 247 // 248 // SX : 6 : unsigned subpixel span from min to max x with range 249 // [0,32]. The original direction is not captured. Would 250 // be nice to capture dx but not necessary right now but 251 // could be in the future. <--- SPARE VALUES AVAILABLE 252 // 253 // A tile Y is encoded as: 254 // 255 // TY : 10 : unsigned min(y0,y1) tile subpixel coordinate. 256 // 257 // DY : 6 : signed subpixel delta y1-y0. The range of delta is 258 // [-32,32] but horizontal lines are not encoded so [1,32] 259 // is mapped to [0,31]. The resulting range [-32,31] fits 260 // in 6 bits. 261 // 262 // TTS: 263 // 264 // 0 31 265 // | TX | SX | TY | DY | 266 // +-----+------+-----+------+ 267 // | 10 | 6 | 10 | 6 | 268 // 269 270 #define SKC_TTS_BITS_TX 10 271 #define SKC_TTS_BITS_SX 6 272 #define SKC_TTS_BITS_TY 10 273 #define SKC_TTS_BITS_SY 6 274 275 // 276 // 277 // 278 279 #define SKC_TTS_INVALID ( SKC_UINT_MAX ) // relies on limited range of dx 280 281 // 282 // 283 // 284 285 #define SKC_TTS_OFFSET_SX (SKC_TTS_BITS_TX) 286 #define SKC_TTS_OFFSET_TY (SKC_TTS_BITS_TX + SKC_TTS_BITS_SX) 287 #define SKC_TTS_OFFSET_DY (SKC_TTS_BITS_TX + SKC_TTS_BITS_SX + SKC_TTS_BITS_TY) 288 289 #define SKC_TTS_MASK_TX SKC_BITS_TO_MASK(SKC_TTS_BITS_TX) 290 #define SKC_TTS_MASK_SX SKC_BITS_TO_MASK_AT(SKC_TTS_BITS_SX,SKC_TTS_OFFSET_SX) 291 #define SKC_TTS_MASK_TY SKC_BITS_TO_MASK_AT(SKC_TTS_BITS_TY,SKC_TTS_OFFSET_TY) 292 293 #define SKC_TTS_MASK_TX_PIXEL SKC_BITS_TO_MASK_AT(SKC_TTS_BITS_TX-SKC_SUBPIXEL_RESL_X_LOG2, \ 294 SKC_SUBPIXEL_RESL_X_LOG2) 295 #define SKC_TTS_MASK_TY_PIXEL SKC_BITS_TO_MASK_AT(SKC_TTS_BITS_TY-SKC_SUBPIXEL_RESL_Y_LOG2, \ 296 SKC_TTS_OFFSET_TY+SKC_SUBPIXEL_RESL_Y_LOG2) 297 298 // 299 // TTRK (64-BIT COMPARE) 300 // 301 // 0 63 302 // | TTSB ID | X | Y | COHORT ID | 303 // +---------+------+------+-----------+ 304 // | 27 | 12 | 12 | 13 | 305 // 306 // 307 // TTRK (32-BIT COMPARE) 308 // 309 // 0 63 310 // | TTSB ID | N/A | X | Y | COHORT ID | 311 // +---------+-----+------+------+-----------+ 312 // | 27 | 5 | 12 | 12 | 8 | 313 // 314 315 // 316 // TTRK is sortable intermediate key format for TTSK 317 // 318 // We're going to use the 32-bit comparison version for now 319 // 320 321 // 322 // TTSK v2: 323 // 324 // 0 63 325 // | TTSB ID | PREFIX | N/A | X | Y | 326 // +---------+--------+------+----+----+ 327 // | 27 | 1 (=0) | 12 | 12 | 12 | 328 // 329 // 330 // TTPK v2: 331 // 332 // 0 63 333 // | TTPB ID | PREFIX | SPAN | X | Y | 334 // +---------+--------+------+-----+-----+ 335 // | 27 | 1 (=1) | 12 | 12 | 12 | 336 // 337 338 #define SKC_TTXK_LO_BITS_ID 27 339 #define SKC_TTXK_LO_BITS_PREFIX 1 340 #define SKC_TTXK_HI_BITS_Y 12 341 #define SKC_TTXK_HI_BITS_X 12 342 #define SKC_TTXK_BITS_SPAN 12 343 #define SKC_TTXK_HI_BITS_YX (SKC_TTXK_HI_BITS_Y + SKC_TTXK_HI_BITS_X) 344 345 #define SKC_TTRK_HI_MASK_X SKC_BITS_TO_MASK(SKC_TTXK_HI_BITS_X) 346 #define SKC_TTRK_HI_MASK_YX SKC_BITS_TO_MASK(SKC_TTXK_HI_BITS_YX) 347 348 #define SKC_TTRK_HI_BITS_COHORT 8 349 #define SKC_TTRK_LO_BITS_NA (32 - SKC_TTXK_LO_BITS_ID) 350 #define SKC_TTRK_HI_BITS_COHORT_Y (SKC_TTRK_HI_BITS_COHORT + SKC_TTXK_HI_BITS_Y) 351 352 #define SKC_TTRK_HI_OFFSET_COHORT (32 - SKC_TTRK_HI_BITS_COHORT) 353 #define SKC_TTRK_HI_MASK_COHORT SKC_BITS_TO_MASK_AT(SKC_TTRK_HI_BITS_COHORT,SKC_TTRK_HI_OFFSET_COHORT) 354 355 #define SKC_TTRK_HI_BITS_COHORT_YX (SKC_TTRK_HI_BITS_COHORT + SKC_TTXK_HI_BITS_Y + SKC_TTXK_HI_BITS_X) 356 357 #define SKC_TTXK_LO_BITS_ID_PREFIX (SKC_TTXK_LO_BITS_ID + SKC_TTXK_LO_BITS_PREFIX) 358 359 #define SKC_TTXK_LO_OFFSET_PREFIX SKC_TTXK_LO_BITS_ID 360 #define SKC_TTXK_LO_OFFSET_SPAN SKC_TTXK_LO_BITS_ID_PREFIX 361 362 #define SKC_TTXK_LO_BITS_SPAN (32 - SKC_TTXK_LO_BITS_ID_PREFIX) 363 #define SKC_TTXK_HI_BITS_SPAN (SKC_TTXK_BITS_SPAN - SKC_TTXK_LO_BITS_SPAN) 364 365 #define SKC_TTXK_LO_OFFSET_PREFIX SKC_TTXK_LO_BITS_ID 366 367 #define SKC_TTXK_LO_MASK_ID SKC_BITS_TO_MASK(SKC_TTXK_LO_BITS_ID) 368 #define SKC_TTXK_LO_MASK_PREFIX SKC_BITS_TO_MASK_AT(SKC_TTXK_LO_BITS_PREFIX,SKC_TTXK_LO_OFFSET_PREFIX) 369 #define SKC_TTXK_LO_MASK_ID_PREFIX SKC_BITS_TO_MASK(SKC_TTXK_LO_BITS_ID_PREFIX) 370 371 #define SKC_TTXK_HI_OFFSET_Y (32 - SKC_TTXK_HI_BITS_Y) 372 #define SKC_TTXK_HI_OFFSET_X (SKC_TTXK_HI_OFFSET_Y - SKC_TTXK_HI_BITS_X) 373 374 #define SKC_TTXK_HI_ONE_X (1u << SKC_TTXK_HI_OFFSET_X) 375 376 #define SKC_TTXK_HI_MASK_YX SKC_BITS_TO_MASK_AT(SKC_TTXK_HI_BITS_YX,SKC_TTXK_HI_OFFSET_X) 377 #define SKC_TTXK_HI_MASK_Y SKC_BITS_TO_MASK_AT(SKC_TTXK_HI_BITS_Y ,SKC_TTXK_HI_OFFSET_Y) 378 379 #define SKC_TTPK_LO_SHL_YX_SPAN (SKC_TTXK_LO_OFFSET_SPAN - SKC_TTXK_HI_OFFSET_X) 380 #define SKC_TTPK_HI_SHR_YX_SPAN (SKC_TTXK_HI_OFFSET_X + SKC_TTXK_LO_BITS_SPAN) 381 382 // 383 // TTCK (32-BIT COMPARE) v1 -- NOT USED: 384 // 385 // 0 63 386 // | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | 387 // +----------------------+--------+--------+-------+-----+-----+ 388 // | 30 | 1 | 1 | 18 | 7 | 7 | 389 // 390 // 391 // TTCK (32-BIT COMPARE) v2 -- NOT USED: 392 // 393 // 0 63 394 // | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | 395 // +----------------------+--------+--------+-------+-----+-----+ 396 // | 30 | 1 | 1 | 15 | 9 | 8 | 397 // 398 // 399 // TTCK (64-BIT COMPARE) -- achieves 4K x 4K with an 8x16 tile: 400 // 401 // 0 63 402 // | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | 403 // +----------------------+--------+--------+-------+-----+-----+ 404 // | 27 | 1 | 1 | 18 | 9 | 8 | 405 // 406 407 #define SKC_TTCK_BITS_LAYER 18 408 409 #define SKC_TTCK_LO_BITS_ID SKC_TTXK_LO_BITS_ID 410 #define SKC_TTCK_LO_OFFSET_ID 0 411 412 #define SKC_TTCK_LO_MASK_ID SKC_BITS_TO_MASK(SKC_TTCK_LO_BITS_ID) 413 414 #define SKC_TTCK_LO_BITS_PREFIX 1 415 #define SKC_TTCK_LO_OFFSET_PREFIX SKC_TTCK_LO_BITS_ID 416 #define SKC_TTCK_LO_MASK_PREFIX SKC_BITS_TO_MASK_AT(SKC_TTCK_LO_BITS_PREFIX,SKC_TTCK_LO_OFFSET_PREFIX) 417 418 #define SKC_TTCK_LO_BITS_ID_PREFIX (SKC_TTCK_LO_BITS_ID + SKC_TTCK_LO_BITS_PREFIX) 419 #define SKC_TTCK_LO_MASK_ID_PREFIX SKC_BITS_TO_MASK(SKC_TTCK_LO_BITS_ID_PREFIX) 420 421 #define SKC_TTCK_LO_BITS_ESCAPE 1 422 #define SKC_TTCK_LO_OFFSET_ESCAPE SKC_TTCK_LO_BITS_ID_PREFIX 423 #define SKC_TTCK_LO_MASK_ESCAPE SKC_BITS_TO_MASK_AT(SKC_TTCK_LO_BITS_ESCAPE,SKC_TTCK_LO_OFFSET_ESCAPE) 424 425 #define SKC_TTCK_LO_BITS_ID_PREFIX_ESCAPE (SKC_TTCK_LO_BITS_ID_PREFIX + SKC_TTCK_LO_BITS_ESCAPE) 426 427 #define SKC_TTCK_HI_OFFSET_Y 24 428 #define SKC_TTCK_HI_OFFSET_X 15 429 430 #define SKC_TTCK_HI_BITS_Y 8 431 #define SKC_TTCK_HI_BITS_X 9 432 #define SKC_TTCK_HI_BITS_YX (SKC_TTCK_HI_BITS_X + SKC_TTCK_HI_BITS_Y) 433 #define SKC_TTCK_HI_MASK_YX SKC_BITS_TO_MASK_AT(SKC_TTCK_HI_BITS_YX,SKC_TTCK_HI_OFFSET_X) 434 435 #define SKC_TTCK_HI_BITS_LAYER (32 - SKC_TTCK_HI_BITS_YX) 436 #define SKC_TTCK_HI_MASK_LAYER SKC_BITS_TO_MASK(SKC_TTCK_HI_BITS_LAYER) 437 #define SKC_TTCK_HI_SHR_LAYER (SKC_TTCK_HI_BITS_Y + SKC_TTCK_HI_BITS_X + SKC_TTCK_BITS_LAYER - 32) 438 439 #define SKC_TTCK_LO_BITS_LAYER (SKC_TTCK_BITS_LAYER - SKC_TTCK_HI_BITS_LAYER) 440 441 // 442 // TILE COORD 443 // 444 // 0 32 445 // | N/A | X | Y | 446 // +-----+-----+-----+ 447 // | 8 | 12 | 12 | 448 // 449 // 450 // This simplifies the clip test in the place kernel. 451 // 452 453 union skc_tile_coord 454 { 455 skc_uint u32; 456 457 struct { 458 #if defined(__OPENCL_C_VERSION__) 459 skc_uint xy; 460 #else 461 skc_uint na0 : 32 - SKC_TTXK_HI_BITS_YX; // 8 462 skc_uint x : SKC_TTXK_HI_BITS_X; // 12 463 skc_uint y : SKC_TTXK_HI_BITS_Y; // 12 464 #endif 465 }; 466 }; 467 468 SKC_STATIC_ASSERT(sizeof(union skc_tile_coord) == sizeof(skc_uint)); 469 470 // 471 // 472 // 473 474 union skc_tile_clip 475 { 476 skc_uint u32a2[2]; 477 478 skc_uint2 u32v2; 479 480 struct { 481 union skc_tile_coord xy0; // lower left 482 union skc_tile_coord xy1; // upper right 483 }; 484 }; 485 486 SKC_STATIC_ASSERT(sizeof(union skc_tile_clip) == sizeof(skc_uint2)); 487 488 // 489 // 490 // 491 492 #endif 493 494 // 495 // 496 // 497