1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26
27 #include "../display_mode_lib.h"
28 #include "../dml_inline_defs.h"
29 #include "../display_mode_vba.h"
30 #include "display_mode_vba_21.h"
31
32
33 /*
34 * NOTE:
35 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 *
37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
38 * ways. Unless there is something clearly wrong with it the code should
39 * remain as-is as it provides us with a guarantee from HW that it is correct.
40 */
41 typedef struct {
42 double DPPCLK;
43 double DISPCLK;
44 double PixelClock;
45 double DCFCLKDeepSleep;
46 unsigned int DPPPerPlane;
47 bool ScalerEnabled;
48 enum scan_direction_class SourceScan;
49 unsigned int BlockWidth256BytesY;
50 unsigned int BlockHeight256BytesY;
51 unsigned int BlockWidth256BytesC;
52 unsigned int BlockHeight256BytesC;
53 unsigned int InterlaceEnable;
54 unsigned int NumberOfCursors;
55 unsigned int VBlank;
56 unsigned int HTotal;
57 } Pipe;
58
59 typedef struct {
60 bool Enable;
61 unsigned int MaxPageTableLevels;
62 unsigned int CachedPageTableLevels;
63 } HostVM;
64
65 #define BPP_INVALID 0
66 #define BPP_BLENDED_PIPE 0xffffffff
67 #define DCN21_MAX_DSC_IMAGE_WIDTH 5184
68 #define DCN21_MAX_420_IMAGE_WIDTH 4096
69
70 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
71 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
72 struct display_mode_lib *mode_lib);
73 static unsigned int dscceComputeDelay(
74 unsigned int bpc,
75 double bpp,
76 unsigned int sliceWidth,
77 unsigned int numSlices,
78 enum output_format_class pixelFormat);
79 static unsigned int dscComputeDelay(enum output_format_class pixelFormat);
80 // Super monster function with some 45 argument
81 static bool CalculatePrefetchSchedule(
82 struct display_mode_lib *mode_lib,
83 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
84 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
85 Pipe *myPipe,
86 unsigned int DSCDelay,
87 double DPPCLKDelaySubtotal,
88 double DPPCLKDelaySCL,
89 double DPPCLKDelaySCLLBOnly,
90 double DPPCLKDelayCNVCFormater,
91 double DPPCLKDelayCNVCCursor,
92 double DISPCLKDelaySubtotal,
93 unsigned int ScalerRecoutWidth,
94 enum output_format_class OutputFormat,
95 unsigned int MaxInterDCNTileRepeaters,
96 unsigned int VStartup,
97 unsigned int MaxVStartup,
98 unsigned int GPUVMPageTableLevels,
99 bool GPUVMEnable,
100 HostVM *myHostVM,
101 bool DynamicMetadataEnable,
102 int DynamicMetadataLinesBeforeActiveRequired,
103 unsigned int DynamicMetadataTransmittedBytes,
104 bool DCCEnable,
105 double UrgentLatency,
106 double UrgentExtraLatency,
107 double TCalc,
108 unsigned int PDEAndMetaPTEBytesFrame,
109 unsigned int MetaRowByte,
110 unsigned int PixelPTEBytesPerRow,
111 double PrefetchSourceLinesY,
112 unsigned int SwathWidthY,
113 double BytePerPixelDETY,
114 double VInitPreFillY,
115 unsigned int MaxNumSwathY,
116 double PrefetchSourceLinesC,
117 double BytePerPixelDETC,
118 double VInitPreFillC,
119 unsigned int MaxNumSwathC,
120 unsigned int SwathHeightY,
121 unsigned int SwathHeightC,
122 double TWait,
123 bool XFCEnabled,
124 double XFCRemoteSurfaceFlipDelay,
125 bool ProgressiveToInterlaceUnitInOPP,
126 double *DSTXAfterScaler,
127 double *DSTYAfterScaler,
128 double *DestinationLinesForPrefetch,
129 double *PrefetchBandwidth,
130 double *DestinationLinesToRequestVMInVBlank,
131 double *DestinationLinesToRequestRowInVBlank,
132 double *VRatioPrefetchY,
133 double *VRatioPrefetchC,
134 double *RequiredPrefetchPixDataBWLuma,
135 double *RequiredPrefetchPixDataBWChroma,
136 unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
137 double *Tno_bw,
138 double *prefetch_vmrow_bw,
139 unsigned int *swath_width_luma_ub,
140 unsigned int *swath_width_chroma_ub,
141 unsigned int *VUpdateOffsetPix,
142 double *VUpdateWidthPix,
143 double *VReadyOffsetPix);
144 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
145 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
146 static double CalculateDCCConfiguration(
147 bool DCCEnabled,
148 bool DCCProgrammingAssumesScanDirectionUnknown,
149 unsigned int ViewportWidth,
150 unsigned int ViewportHeight,
151 double DETBufferSize,
152 unsigned int RequestHeight256Byte,
153 unsigned int SwathHeight,
154 enum dm_swizzle_mode TilingFormat,
155 unsigned int BytePerPixel,
156 enum scan_direction_class ScanOrientation,
157 unsigned int *MaxUncompressedBlock,
158 unsigned int *MaxCompressedBlock,
159 unsigned int *Independent64ByteBlock);
160 static double CalculatePrefetchSourceLines(
161 struct display_mode_lib *mode_lib,
162 double VRatio,
163 double vtaps,
164 bool Interlace,
165 bool ProgressiveToInterlaceUnitInOPP,
166 unsigned int SwathHeight,
167 unsigned int ViewportYStart,
168 double *VInitPreFill,
169 unsigned int *MaxNumSwath);
170 static unsigned int CalculateVMAndRowBytes(
171 struct display_mode_lib *mode_lib,
172 bool DCCEnable,
173 unsigned int BlockHeight256Bytes,
174 unsigned int BlockWidth256Bytes,
175 enum source_format_class SourcePixelFormat,
176 unsigned int SurfaceTiling,
177 unsigned int BytePerPixel,
178 enum scan_direction_class ScanDirection,
179 unsigned int ViewportWidth,
180 unsigned int ViewportHeight,
181 unsigned int SwathWidthY,
182 bool GPUVMEnable,
183 bool HostVMEnable,
184 unsigned int HostVMMaxPageTableLevels,
185 unsigned int HostVMCachedPageTableLevels,
186 unsigned int VMMPageSize,
187 unsigned int PTEBufferSizeInRequests,
188 unsigned int Pitch,
189 unsigned int DCCMetaPitch,
190 unsigned int *MacroTileWidth,
191 unsigned int *MetaRowByte,
192 unsigned int *PixelPTEBytesPerRow,
193 bool *PTEBufferSizeNotExceeded,
194 unsigned int *dpte_row_width_ub,
195 unsigned int *dpte_row_height,
196 unsigned int *MetaRequestWidth,
197 unsigned int *MetaRequestHeight,
198 unsigned int *meta_row_width,
199 unsigned int *meta_row_height,
200 unsigned int *vm_group_bytes,
201 unsigned int *dpte_group_bytes,
202 unsigned int *PixelPTEReqWidth,
203 unsigned int *PixelPTEReqHeight,
204 unsigned int *PTERequestSize,
205 unsigned int *DPDE0BytesFrame,
206 unsigned int *MetaPTEBytesFrame);
207
208 static double CalculateTWait(
209 unsigned int PrefetchMode,
210 double DRAMClockChangeLatency,
211 double UrgentLatency,
212 double SREnterPlusExitTime);
213 static double CalculateRemoteSurfaceFlipDelay(
214 struct display_mode_lib *mode_lib,
215 double VRatio,
216 double SwathWidth,
217 double Bpp,
218 double LineTime,
219 double XFCTSlvVupdateOffset,
220 double XFCTSlvVupdateWidth,
221 double XFCTSlvVreadyOffset,
222 double XFCXBUFLatencyTolerance,
223 double XFCFillBWOverhead,
224 double XFCSlvChunkSize,
225 double XFCBusTransportTime,
226 double TCalc,
227 double TWait,
228 double *SrcActiveDrainRate,
229 double *TInitXFill,
230 double *TslvChk);
231 static void CalculateActiveRowBandwidth(
232 bool GPUVMEnable,
233 enum source_format_class SourcePixelFormat,
234 double VRatio,
235 bool DCCEnable,
236 double LineTime,
237 unsigned int MetaRowByteLuma,
238 unsigned int MetaRowByteChroma,
239 unsigned int meta_row_height_luma,
240 unsigned int meta_row_height_chroma,
241 unsigned int PixelPTEBytesPerRowLuma,
242 unsigned int PixelPTEBytesPerRowChroma,
243 unsigned int dpte_row_height_luma,
244 unsigned int dpte_row_height_chroma,
245 double *meta_row_bw,
246 double *dpte_row_bw);
247 static void CalculateFlipSchedule(
248 struct display_mode_lib *mode_lib,
249 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
250 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
251 double UrgentExtraLatency,
252 double UrgentLatency,
253 unsigned int GPUVMMaxPageTableLevels,
254 bool HostVMEnable,
255 unsigned int HostVMMaxPageTableLevels,
256 unsigned int HostVMCachedPageTableLevels,
257 bool GPUVMEnable,
258 double PDEAndMetaPTEBytesPerFrame,
259 double MetaRowBytes,
260 double DPTEBytesPerRow,
261 double BandwidthAvailableForImmediateFlip,
262 unsigned int TotImmediateFlipBytes,
263 enum source_format_class SourcePixelFormat,
264 double LineTime,
265 double VRatio,
266 double Tno_bw,
267 bool DCCEnable,
268 unsigned int dpte_row_height,
269 unsigned int meta_row_height,
270 unsigned int dpte_row_height_chroma,
271 unsigned int meta_row_height_chroma,
272 double *DestinationLinesToRequestVMInImmediateFlip,
273 double *DestinationLinesToRequestRowInImmediateFlip,
274 double *final_flip_bw,
275 bool *ImmediateFlipSupportedForPipe);
276 static double CalculateWriteBackDelay(
277 enum source_format_class WritebackPixelFormat,
278 double WritebackHRatio,
279 double WritebackVRatio,
280 unsigned int WritebackLumaHTaps,
281 unsigned int WritebackLumaVTaps,
282 unsigned int WritebackChromaHTaps,
283 unsigned int WritebackChromaVTaps,
284 unsigned int WritebackDestinationWidth);
285 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
286 struct display_mode_lib *mode_lib,
287 unsigned int PrefetchMode,
288 unsigned int NumberOfActivePlanes,
289 unsigned int MaxLineBufferLines,
290 unsigned int LineBufferSize,
291 unsigned int DPPOutputBufferPixels,
292 double DETBufferSizeInKByte,
293 unsigned int WritebackInterfaceLumaBufferSize,
294 unsigned int WritebackInterfaceChromaBufferSize,
295 double DCFCLK,
296 double UrgentOutOfOrderReturn,
297 double ReturnBW,
298 bool GPUVMEnable,
299 int dpte_group_bytes[],
300 unsigned int MetaChunkSize,
301 double UrgentLatency,
302 double ExtraLatency,
303 double WritebackLatency,
304 double WritebackChunkSize,
305 double SOCCLK,
306 double DRAMClockChangeLatency,
307 double SRExitTime,
308 double SREnterPlusExitTime,
309 double DCFCLKDeepSleep,
310 int DPPPerPlane[],
311 bool DCCEnable[],
312 double DPPCLK[],
313 double SwathWidthSingleDPPY[],
314 unsigned int SwathHeightY[],
315 double ReadBandwidthPlaneLuma[],
316 unsigned int SwathHeightC[],
317 double ReadBandwidthPlaneChroma[],
318 unsigned int LBBitPerPixel[],
319 double SwathWidthY[],
320 double HRatio[],
321 unsigned int vtaps[],
322 unsigned int VTAPsChroma[],
323 double VRatio[],
324 unsigned int HTotal[],
325 double PixelClock[],
326 unsigned int BlendingAndTiming[],
327 double BytePerPixelDETY[],
328 double BytePerPixelDETC[],
329 bool WritebackEnable[],
330 enum source_format_class WritebackPixelFormat[],
331 double WritebackDestinationWidth[],
332 double WritebackDestinationHeight[],
333 double WritebackSourceHeight[],
334 enum clock_change_support *DRAMClockChangeSupport,
335 double *UrgentWatermark,
336 double *WritebackUrgentWatermark,
337 double *DRAMClockChangeWatermark,
338 double *WritebackDRAMClockChangeWatermark,
339 double *StutterExitWatermark,
340 double *StutterEnterPlusExitWatermark,
341 double *MinActiveDRAMClockChangeLatencySupported);
342 static void CalculateDCFCLKDeepSleep(
343 struct display_mode_lib *mode_lib,
344 unsigned int NumberOfActivePlanes,
345 double BytePerPixelDETY[],
346 double BytePerPixelDETC[],
347 double VRatio[],
348 double SwathWidthY[],
349 int DPPPerPlane[],
350 double HRatio[],
351 double PixelClock[],
352 double PSCL_THROUGHPUT[],
353 double PSCL_THROUGHPUT_CHROMA[],
354 double DPPCLK[],
355 double *DCFCLKDeepSleep);
356 static void CalculateDETBufferSize(
357 double DETBufferSizeInKByte,
358 unsigned int SwathHeightY,
359 unsigned int SwathHeightC,
360 double *DETBufferSizeY,
361 double *DETBufferSizeC);
362 static void CalculateUrgentBurstFactor(
363 unsigned int DETBufferSizeInKByte,
364 unsigned int SwathHeightY,
365 unsigned int SwathHeightC,
366 unsigned int SwathWidthY,
367 double LineTime,
368 double UrgentLatency,
369 double CursorBufferSize,
370 unsigned int CursorWidth,
371 unsigned int CursorBPP,
372 double VRatio,
373 double VRatioPreY,
374 double VRatioPreC,
375 double BytePerPixelInDETY,
376 double BytePerPixelInDETC,
377 double *UrgentBurstFactorCursor,
378 double *UrgentBurstFactorCursorPre,
379 double *UrgentBurstFactorLuma,
380 double *UrgentBurstFactorLumaPre,
381 double *UrgentBurstFactorChroma,
382 double *UrgentBurstFactorChromaPre,
383 unsigned int *NotEnoughUrgentLatencyHiding,
384 unsigned int *NotEnoughUrgentLatencyHidingPre);
385
386 static void CalculatePixelDeliveryTimes(
387 unsigned int NumberOfActivePlanes,
388 double VRatio[],
389 double VRatioPrefetchY[],
390 double VRatioPrefetchC[],
391 unsigned int swath_width_luma_ub[],
392 unsigned int swath_width_chroma_ub[],
393 int DPPPerPlane[],
394 double HRatio[],
395 double PixelClock[],
396 double PSCL_THROUGHPUT[],
397 double PSCL_THROUGHPUT_CHROMA[],
398 double DPPCLK[],
399 double BytePerPixelDETC[],
400 enum scan_direction_class SourceScan[],
401 unsigned int BlockWidth256BytesY[],
402 unsigned int BlockHeight256BytesY[],
403 unsigned int BlockWidth256BytesC[],
404 unsigned int BlockHeight256BytesC[],
405 double DisplayPipeLineDeliveryTimeLuma[],
406 double DisplayPipeLineDeliveryTimeChroma[],
407 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
408 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
409 double DisplayPipeRequestDeliveryTimeLuma[],
410 double DisplayPipeRequestDeliveryTimeChroma[],
411 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
412 double DisplayPipeRequestDeliveryTimeChromaPrefetch[]);
413
414 static void CalculateMetaAndPTETimes(
415 unsigned int NumberOfActivePlanes,
416 bool GPUVMEnable,
417 unsigned int MetaChunkSize,
418 unsigned int MinMetaChunkSizeBytes,
419 unsigned int GPUVMMaxPageTableLevels,
420 unsigned int HTotal[],
421 double VRatio[],
422 double VRatioPrefetchY[],
423 double VRatioPrefetchC[],
424 double DestinationLinesToRequestRowInVBlank[],
425 double DestinationLinesToRequestRowInImmediateFlip[],
426 double DestinationLinesToRequestVMInVBlank[],
427 double DestinationLinesToRequestVMInImmediateFlip[],
428 bool DCCEnable[],
429 double PixelClock[],
430 double BytePerPixelDETY[],
431 double BytePerPixelDETC[],
432 enum scan_direction_class SourceScan[],
433 unsigned int dpte_row_height[],
434 unsigned int dpte_row_height_chroma[],
435 unsigned int meta_row_width[],
436 unsigned int meta_row_height[],
437 unsigned int meta_req_width[],
438 unsigned int meta_req_height[],
439 int dpte_group_bytes[],
440 unsigned int PTERequestSizeY[],
441 unsigned int PTERequestSizeC[],
442 unsigned int PixelPTEReqWidthY[],
443 unsigned int PixelPTEReqHeightY[],
444 unsigned int PixelPTEReqWidthC[],
445 unsigned int PixelPTEReqHeightC[],
446 unsigned int dpte_row_width_luma_ub[],
447 unsigned int dpte_row_width_chroma_ub[],
448 unsigned int vm_group_bytes[],
449 unsigned int dpde0_bytes_per_frame_ub_l[],
450 unsigned int dpde0_bytes_per_frame_ub_c[],
451 unsigned int meta_pte_bytes_per_frame_ub_l[],
452 unsigned int meta_pte_bytes_per_frame_ub_c[],
453 double DST_Y_PER_PTE_ROW_NOM_L[],
454 double DST_Y_PER_PTE_ROW_NOM_C[],
455 double DST_Y_PER_META_ROW_NOM_L[],
456 double TimePerMetaChunkNominal[],
457 double TimePerMetaChunkVBlank[],
458 double TimePerMetaChunkFlip[],
459 double time_per_pte_group_nom_luma[],
460 double time_per_pte_group_vblank_luma[],
461 double time_per_pte_group_flip_luma[],
462 double time_per_pte_group_nom_chroma[],
463 double time_per_pte_group_vblank_chroma[],
464 double time_per_pte_group_flip_chroma[],
465 double TimePerVMGroupVBlank[],
466 double TimePerVMGroupFlip[],
467 double TimePerVMRequestVBlank[],
468 double TimePerVMRequestFlip[]);
469
470 static double CalculateExtraLatency(
471 double UrgentRoundTripAndOutOfOrderLatency,
472 int TotalNumberOfActiveDPP,
473 int PixelChunkSizeInKByte,
474 int TotalNumberOfDCCActiveDPP,
475 int MetaChunkSize,
476 double ReturnBW,
477 bool GPUVMEnable,
478 bool HostVMEnable,
479 int NumberOfActivePlanes,
480 int NumberOfDPP[],
481 int dpte_group_bytes[],
482 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
483 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
484 int HostVMMaxPageTableLevels,
485 int HostVMCachedPageTableLevels);
486
dml21_recalculate(struct display_mode_lib * mode_lib)487 void dml21_recalculate(struct display_mode_lib *mode_lib)
488 {
489 ModeSupportAndSystemConfiguration(mode_lib);
490 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
491 DisplayPipeConfiguration(mode_lib);
492 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
493 }
494
dscceComputeDelay(unsigned int bpc,double bpp,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat)495 static unsigned int dscceComputeDelay(
496 unsigned int bpc,
497 double bpp,
498 unsigned int sliceWidth,
499 unsigned int numSlices,
500 enum output_format_class pixelFormat)
501 {
502 // valid bpc = source bits per component in the set of {8, 10, 12}
503 // valid bpp = increments of 1/16 of a bit
504 // min = 6/7/8 in N420/N422/444, respectively
505 // max = such that compression is 1:1
506 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
507 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
508 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
509
510 // fixed value
511 unsigned int rcModelSize = 8192;
512
513 // N422/N420 operate at 2 pixels per clock
514 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l,
515 Delay, pixels;
516
517 if (pixelFormat == dm_n422 || pixelFormat == dm_420)
518 pixelsPerClock = 2;
519 // #all other modes operate at 1 pixel per clock
520 else
521 pixelsPerClock = 1;
522
523 //initial transmit delay as per PPS
524 initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock);
525
526 //compute ssm delay
527 if (bpc == 8)
528 D = 81;
529 else if (bpc == 10)
530 D = 89;
531 else
532 D = 113;
533
534 //divide by pixel per cycle to compute slice width as seen by DSC
535 w = sliceWidth / pixelsPerClock;
536
537 //422 mode has an additional cycle of delay
538 if (pixelFormat == dm_s422)
539 S = 1;
540 else
541 S = 0;
542
543 //main calculation for the dscce
544 ix = initalXmitDelay + 45;
545 wx = (w + 2) / 3;
546 p = 3 * wx - w;
547 l0 = ix / w;
548 a = ix + p * l0;
549 ax = (a + 2) / 3 + D + 6 + 1;
550 l = (ax + wx - 1) / wx;
551 if ((ix % w) == 0 && p != 0)
552 lstall = 1;
553 else
554 lstall = 0;
555 Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22;
556
557 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
558 pixels = Delay * 3 * pixelsPerClock;
559 return pixels;
560 }
561
dscComputeDelay(enum output_format_class pixelFormat)562 static unsigned int dscComputeDelay(enum output_format_class pixelFormat)
563 {
564 unsigned int Delay = 0;
565
566 if (pixelFormat == dm_420) {
567 // sfr
568 Delay = Delay + 2;
569 // dsccif
570 Delay = Delay + 0;
571 // dscc - input deserializer
572 Delay = Delay + 3;
573 // dscc gets pixels every other cycle
574 Delay = Delay + 2;
575 // dscc - input cdc fifo
576 Delay = Delay + 12;
577 // dscc gets pixels every other cycle
578 Delay = Delay + 13;
579 // dscc - cdc uncertainty
580 Delay = Delay + 2;
581 // dscc - output cdc fifo
582 Delay = Delay + 7;
583 // dscc gets pixels every other cycle
584 Delay = Delay + 3;
585 // dscc - cdc uncertainty
586 Delay = Delay + 2;
587 // dscc - output serializer
588 Delay = Delay + 1;
589 // sft
590 Delay = Delay + 1;
591 } else if (pixelFormat == dm_n422) {
592 // sfr
593 Delay = Delay + 2;
594 // dsccif
595 Delay = Delay + 1;
596 // dscc - input deserializer
597 Delay = Delay + 5;
598 // dscc - input cdc fifo
599 Delay = Delay + 25;
600 // dscc - cdc uncertainty
601 Delay = Delay + 2;
602 // dscc - output cdc fifo
603 Delay = Delay + 10;
604 // dscc - cdc uncertainty
605 Delay = Delay + 2;
606 // dscc - output serializer
607 Delay = Delay + 1;
608 // sft
609 Delay = Delay + 1;
610 } else {
611 // sfr
612 Delay = Delay + 2;
613 // dsccif
614 Delay = Delay + 0;
615 // dscc - input deserializer
616 Delay = Delay + 3;
617 // dscc - input cdc fifo
618 Delay = Delay + 12;
619 // dscc - cdc uncertainty
620 Delay = Delay + 2;
621 // dscc - output cdc fifo
622 Delay = Delay + 7;
623 // dscc - output serializer
624 Delay = Delay + 1;
625 // dscc - cdc uncertainty
626 Delay = Delay + 2;
627 // sft
628 Delay = Delay + 1;
629 }
630
631 return Delay;
632 }
633
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotal,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCFormater,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int ScalerRecoutWidth,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,HostVM * myHostVM,bool DynamicMetadataEnable,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,bool DCCEnable,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double BytePerPixelDETY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,double BytePerPixelDETC,double VInitPreFillC,unsigned int MaxNumSwathC,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool XFCEnabled,double XFCRemoteSurfaceFlipDelay,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,unsigned int * VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,unsigned int * swath_width_luma_ub,unsigned int * swath_width_chroma_ub,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)634 static bool CalculatePrefetchSchedule(
635 struct display_mode_lib *mode_lib,
636 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
637 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
638 Pipe *myPipe,
639 unsigned int DSCDelay,
640 double DPPCLKDelaySubtotal,
641 double DPPCLKDelaySCL,
642 double DPPCLKDelaySCLLBOnly,
643 double DPPCLKDelayCNVCFormater,
644 double DPPCLKDelayCNVCCursor,
645 double DISPCLKDelaySubtotal,
646 unsigned int ScalerRecoutWidth,
647 enum output_format_class OutputFormat,
648 unsigned int MaxInterDCNTileRepeaters,
649 unsigned int VStartup,
650 unsigned int MaxVStartup,
651 unsigned int GPUVMPageTableLevels,
652 bool GPUVMEnable,
653 HostVM *myHostVM,
654 bool DynamicMetadataEnable,
655 int DynamicMetadataLinesBeforeActiveRequired,
656 unsigned int DynamicMetadataTransmittedBytes,
657 bool DCCEnable,
658 double UrgentLatency,
659 double UrgentExtraLatency,
660 double TCalc,
661 unsigned int PDEAndMetaPTEBytesFrame,
662 unsigned int MetaRowByte,
663 unsigned int PixelPTEBytesPerRow,
664 double PrefetchSourceLinesY,
665 unsigned int SwathWidthY,
666 double BytePerPixelDETY,
667 double VInitPreFillY,
668 unsigned int MaxNumSwathY,
669 double PrefetchSourceLinesC,
670 double BytePerPixelDETC,
671 double VInitPreFillC,
672 unsigned int MaxNumSwathC,
673 unsigned int SwathHeightY,
674 unsigned int SwathHeightC,
675 double TWait,
676 bool XFCEnabled,
677 double XFCRemoteSurfaceFlipDelay,
678 bool ProgressiveToInterlaceUnitInOPP,
679 double *DSTXAfterScaler,
680 double *DSTYAfterScaler,
681 double *DestinationLinesForPrefetch,
682 double *PrefetchBandwidth,
683 double *DestinationLinesToRequestVMInVBlank,
684 double *DestinationLinesToRequestRowInVBlank,
685 double *VRatioPrefetchY,
686 double *VRatioPrefetchC,
687 double *RequiredPrefetchPixDataBWLuma,
688 double *RequiredPrefetchPixDataBWChroma,
689 unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
690 double *Tno_bw,
691 double *prefetch_vmrow_bw,
692 unsigned int *swath_width_luma_ub,
693 unsigned int *swath_width_chroma_ub,
694 unsigned int *VUpdateOffsetPix,
695 double *VUpdateWidthPix,
696 double *VReadyOffsetPix)
697 {
698 bool MyError = false;
699 unsigned int DPPCycles, DISPCLKCycles;
700 double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime;
701 double Tdm, LineTime, Tsetup;
702 double dst_y_prefetch_equ;
703 double Tsw_oto;
704 double prefetch_bw_oto;
705 double Tvm_oto;
706 double Tr0_oto;
707 double Tvm_oto_lines;
708 double Tr0_oto_lines;
709 double Tsw_oto_lines;
710 double dst_y_prefetch_oto;
711 double TimeForFetchingMetaPTE = 0;
712 double TimeForFetchingRowInVBlank = 0;
713 double LinesToRequestPrefetchPixelData = 0;
714 double HostVMInefficiencyFactor;
715 unsigned int HostVMDynamicLevels;
716
717 if (GPUVMEnable == true && myHostVM->Enable == true) {
718 HostVMInefficiencyFactor =
719 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
720 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
721 HostVMDynamicLevels = myHostVM->MaxPageTableLevels
722 - myHostVM->CachedPageTableLevels;
723 } else {
724 HostVMInefficiencyFactor = 1;
725 HostVMDynamicLevels = 0;
726 }
727
728 if (myPipe->ScalerEnabled)
729 DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL;
730 else
731 DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly;
732
733 DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
734
735 DISPCLKCycles = DISPCLKDelaySubtotal;
736
737 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
738 return true;
739
740 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK
741 + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
742
743 if (myPipe->DPPPerPlane > 1)
744 *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth;
745
746 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
747 *DSTYAfterScaler = 1;
748 else
749 *DSTYAfterScaler = 0;
750
751 DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler;
752 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
753 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
754
755 *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1);
756 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK);
757 *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime)
758 * myPipe->PixelClock;
759
760 *VReadyOffsetPix = dml_max(
761 150.0 / myPipe->DPPCLK,
762 TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK)
763 * myPipe->PixelClock;
764
765 Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock;
766
767 LineTime = (double) myPipe->HTotal / myPipe->PixelClock;
768
769 if (DynamicMetadataEnable) {
770 double Tdmbf, Tdmec, Tdmsks;
771
772 Tdm = dml_max(0.0, UrgentExtraLatency - TCalc);
773 Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK;
774 Tdmec = LineTime;
775 if (DynamicMetadataLinesBeforeActiveRequired == -1)
776 Tdmsks = myPipe->VBlank * LineTime / 2.0;
777 else
778 Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime;
779 if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP)
780 Tdmsks = Tdmsks / 2;
781 if (VStartup * LineTime
782 < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) {
783 MyError = true;
784 *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait
785 + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime;
786 } else
787 *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0;
788 } else
789 Tdm = 0;
790
791 if (GPUVMEnable) {
792 if (GPUVMPageTableLevels >= 3)
793 *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1);
794 else
795 *Tno_bw = 0;
796 } else if (!DCCEnable)
797 *Tno_bw = LineTime;
798 else
799 *Tno_bw = LineTime / 4;
800
801 dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime
802 - (Tsetup + Tdm) / LineTime
803 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
804
805 Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
806
807 if (myPipe->SourceScan == dm_horz) {
808 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
809 *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
810 } else {
811 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
812 *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
813 }
814
815 prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
816
817
818 if (GPUVMEnable == true) {
819 Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
820 dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1),
821 LineTime / 4.0));
822 } else
823 Tvm_oto = LineTime / 4.0;
824
825 if ((GPUVMEnable == true || DCCEnable == true)) {
826 Tr0_oto = dml_max(
827 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
828 dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4)));
829 } else
830 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
831
832 Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0;
833 Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0;
834 Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0;
835 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75;
836
837 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
838
839 if (dst_y_prefetch_oto < dst_y_prefetch_equ)
840 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
841 else
842 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
843
844 dml_print("DML: VStartup: %d\n", VStartup);
845 dml_print("DML: TCalc: %f\n", TCalc);
846 dml_print("DML: TWait: %f\n", TWait);
847 dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay);
848 dml_print("DML: LineTime: %f\n", LineTime);
849 dml_print("DML: Tsetup: %f\n", Tsetup);
850 dml_print("DML: Tdm: %f\n", Tdm);
851 dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler);
852 dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler);
853 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
854
855 *PrefetchBandwidth = 0;
856 *DestinationLinesToRequestVMInVBlank = 0;
857 *DestinationLinesToRequestRowInVBlank = 0;
858 *VRatioPrefetchY = 0;
859 *VRatioPrefetchC = 0;
860 *RequiredPrefetchPixDataBWLuma = 0;
861 if (*DestinationLinesForPrefetch > 1) {
862 double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
863 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
864 + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1)
865 + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2))
866 / (*DestinationLinesForPrefetch * LineTime - *Tno_bw);
867
868 double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
869 HostVMInefficiencyFactor + PrefetchSourceLinesY *
870 *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
871 PrefetchSourceLinesC * *swath_width_chroma_ub *
872 dml_ceil(BytePerPixelDETC, 2)) /
873 (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 *
874 UrgentLatency * (1 + HostVMDynamicLevels));
875
876 double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow
877 * HostVMInefficiencyFactor + PrefetchSourceLinesY *
878 *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
879 PrefetchSourceLinesC * *swath_width_chroma_ub *
880 dml_ceil(BytePerPixelDETC, 2)) /
881 (*DestinationLinesForPrefetch * LineTime -
882 UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
883 * (HostVMDynamicLevels + 1) - 1));
884
885 double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub *
886 dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC *
887 *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) /
888 (*DestinationLinesForPrefetch * LineTime -
889 UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
890 * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency *
891 (1 + HostVMDynamicLevels));
892
893 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) {
894 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw);
895 }
896 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
897 *PrefetchBandwidth = PrefetchBandwidth1;
898 } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) {
899 *PrefetchBandwidth = PrefetchBandwidth2;
900 } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
901 *PrefetchBandwidth = PrefetchBandwidth3;
902 } else {
903 *PrefetchBandwidth = PrefetchBandwidth4;
904 }
905
906 if (GPUVMEnable) {
907 TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth,
908 dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4));
909 } else {
910 // 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor
911 // so if this needs to be reinstated, then it should be officially done in the VBA code as well.
912 // if (mode_lib->NumberOfCursors > 0 || XFCEnabled)
913 TimeForFetchingMetaPTE = LineTime / 4;
914 // else
915 // TimeForFetchingMetaPTE = 0.0;
916 }
917
918 if ((GPUVMEnable == true || DCCEnable == true)) {
919 TimeForFetchingRowInVBlank =
920 dml_max(
921 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
922 / *PrefetchBandwidth,
923 dml_max(
924 UrgentLatency * (1 + HostVMDynamicLevels),
925 dml_max(
926 (LineTime
927 - TimeForFetchingMetaPTE) / 2.0,
928 LineTime
929 / 4.0)));
930 } else {
931 // See note above dated 5/30/2018
932 // if (NumberOfCursors > 0 || XFCEnabled)
933 TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0;
934 // else // TODO: Did someone else add this??
935 // TimeForFetchingRowInVBlank = 0.0;
936 }
937
938 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
939
940 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
941
942 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
943 // See note above dated 5/30/2018
944 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
945 - ((GPUVMEnable || DCCEnable) ?
946 (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) :
947 0.0); // TODO: Did someone else add this??
948
949 if (LinesToRequestPrefetchPixelData > 0) {
950
951 *VRatioPrefetchY = (double) PrefetchSourceLinesY
952 / LinesToRequestPrefetchPixelData;
953 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
954 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
955 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
956 *VRatioPrefetchY =
957 dml_max(
958 (double) PrefetchSourceLinesY
959 / LinesToRequestPrefetchPixelData,
960 (double) MaxNumSwathY
961 * SwathHeightY
962 / (LinesToRequestPrefetchPixelData
963 - (VInitPreFillY
964 - 3.0)
965 / 2.0));
966 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
967 } else {
968 MyError = true;
969 *VRatioPrefetchY = 0;
970 }
971 }
972
973 *VRatioPrefetchC = (double) PrefetchSourceLinesC
974 / LinesToRequestPrefetchPixelData;
975 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
976
977 if ((SwathHeightC > 4)) {
978 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
979 *VRatioPrefetchC =
980 dml_max(
981 *VRatioPrefetchC,
982 (double) MaxNumSwathC
983 * SwathHeightC
984 / (LinesToRequestPrefetchPixelData
985 - (VInitPreFillC
986 - 3.0)
987 / 2.0));
988 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
989 } else {
990 MyError = true;
991 *VRatioPrefetchC = 0;
992 }
993 }
994
995 *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane
996 * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData
997 * dml_ceil(BytePerPixelDETY, 1)
998 * *swath_width_luma_ub / LineTime;
999 *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane
1000 * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData
1001 * dml_ceil(BytePerPixelDETC, 2)
1002 * *swath_width_chroma_ub / LineTime;
1003 } else {
1004 MyError = true;
1005 *VRatioPrefetchY = 0;
1006 *VRatioPrefetchC = 0;
1007 *RequiredPrefetchPixDataBWLuma = 0;
1008 *RequiredPrefetchPixDataBWChroma = 0;
1009 }
1010
1011 dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE);
1012 dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank);
1013 dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank);
1014 dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime);
1015 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1016
1017 } else {
1018 MyError = true;
1019 }
1020
1021 {
1022 double prefetch_vm_bw;
1023 double prefetch_row_bw;
1024
1025 if (PDEAndMetaPTEBytesFrame == 0) {
1026 prefetch_vm_bw = 0;
1027 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1028 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1029 } else {
1030 prefetch_vm_bw = 0;
1031 MyError = true;
1032 }
1033 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1034 prefetch_row_bw = 0;
1035 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1036 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1037 } else {
1038 prefetch_row_bw = 0;
1039 MyError = true;
1040 }
1041
1042 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1043 }
1044
1045 if (MyError) {
1046 *PrefetchBandwidth = 0;
1047 TimeForFetchingMetaPTE = 0;
1048 TimeForFetchingRowInVBlank = 0;
1049 *DestinationLinesToRequestVMInVBlank = 0;
1050 *DestinationLinesToRequestRowInVBlank = 0;
1051 *DestinationLinesForPrefetch = 0;
1052 LinesToRequestPrefetchPixelData = 0;
1053 *VRatioPrefetchY = 0;
1054 *VRatioPrefetchC = 0;
1055 *RequiredPrefetchPixDataBWLuma = 0;
1056 *RequiredPrefetchPixDataBWChroma = 0;
1057 }
1058
1059 return MyError;
1060 }
1061
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1062 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1063 {
1064 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1065 }
1066
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1067 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1068 {
1069 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1);
1070 }
1071
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,unsigned int ViewportWidth,unsigned int ViewportHeight,double DETBufferSize,unsigned int RequestHeight256Byte,unsigned int SwathHeight,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixel,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlock,unsigned int * MaxCompressedBlock,unsigned int * Independent64ByteBlock)1072 static double CalculateDCCConfiguration(
1073 bool DCCEnabled,
1074 bool DCCProgrammingAssumesScanDirectionUnknown,
1075 unsigned int ViewportWidth,
1076 unsigned int ViewportHeight,
1077 double DETBufferSize,
1078 unsigned int RequestHeight256Byte,
1079 unsigned int SwathHeight,
1080 enum dm_swizzle_mode TilingFormat,
1081 unsigned int BytePerPixel,
1082 enum scan_direction_class ScanOrientation,
1083 unsigned int *MaxUncompressedBlock,
1084 unsigned int *MaxCompressedBlock,
1085 unsigned int *Independent64ByteBlock)
1086 {
1087 double MaximumDCCCompressionSurface = 0.0;
1088 enum {
1089 REQ_256Bytes,
1090 REQ_128BytesNonContiguous,
1091 REQ_128BytesContiguous,
1092 REQ_NA
1093 } Request = REQ_NA;
1094
1095 if (DCCEnabled == true) {
1096 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1097 if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel
1098 && DETBufferSize
1099 >= 256 / RequestHeight256Byte
1100 * ViewportHeight) {
1101 Request = REQ_256Bytes;
1102 } else if ((DETBufferSize
1103 < RequestHeight256Byte * ViewportWidth * BytePerPixel
1104 && (BytePerPixel == 2 || BytePerPixel == 4))
1105 || (DETBufferSize
1106 < 256 / RequestHeight256Byte
1107 * ViewportHeight
1108 && BytePerPixel == 8
1109 && (TilingFormat == dm_sw_4kb_d
1110 || TilingFormat
1111 == dm_sw_4kb_d_x
1112 || TilingFormat
1113 == dm_sw_var_d
1114 || TilingFormat
1115 == dm_sw_var_d_x
1116 || TilingFormat
1117 == dm_sw_64kb_d
1118 || TilingFormat
1119 == dm_sw_64kb_d_x
1120 || TilingFormat
1121 == dm_sw_64kb_d_t
1122 || TilingFormat
1123 == dm_sw_64kb_r_x))) {
1124 Request = REQ_128BytesNonContiguous;
1125 } else {
1126 Request = REQ_128BytesContiguous;
1127 }
1128 } else {
1129 if (BytePerPixel == 1) {
1130 if (ScanOrientation == dm_vert || SwathHeight == 16) {
1131 Request = REQ_256Bytes;
1132 } else {
1133 Request = REQ_128BytesContiguous;
1134 }
1135 } else if (BytePerPixel == 2) {
1136 if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) {
1137 Request = REQ_256Bytes;
1138 } else if (ScanOrientation == dm_vert) {
1139 Request = REQ_128BytesContiguous;
1140 } else {
1141 Request = REQ_128BytesNonContiguous;
1142 }
1143 } else if (BytePerPixel == 4) {
1144 if (SwathHeight == 8) {
1145 Request = REQ_256Bytes;
1146 } else if (ScanOrientation == dm_vert) {
1147 Request = REQ_128BytesContiguous;
1148 } else {
1149 Request = REQ_128BytesNonContiguous;
1150 }
1151 } else if (BytePerPixel == 8) {
1152 if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x
1153 || TilingFormat == dm_sw_var_d
1154 || TilingFormat == dm_sw_var_d_x
1155 || TilingFormat == dm_sw_64kb_d
1156 || TilingFormat == dm_sw_64kb_d_x
1157 || TilingFormat == dm_sw_64kb_d_t
1158 || TilingFormat == dm_sw_64kb_r_x) {
1159 if ((ScanOrientation == dm_vert && SwathHeight == 8)
1160 || (ScanOrientation != dm_vert
1161 && SwathHeight == 4)) {
1162 Request = REQ_256Bytes;
1163 } else if (ScanOrientation != dm_vert) {
1164 Request = REQ_128BytesContiguous;
1165 } else {
1166 Request = REQ_128BytesNonContiguous;
1167 }
1168 } else {
1169 if (ScanOrientation != dm_vert || SwathHeight == 8) {
1170 Request = REQ_256Bytes;
1171 } else {
1172 Request = REQ_128BytesContiguous;
1173 }
1174 }
1175 }
1176 }
1177 } else {
1178 Request = REQ_NA;
1179 }
1180
1181 if (Request == REQ_256Bytes) {
1182 *MaxUncompressedBlock = 256;
1183 *MaxCompressedBlock = 256;
1184 *Independent64ByteBlock = false;
1185 MaximumDCCCompressionSurface = 4.0;
1186 } else if (Request == REQ_128BytesContiguous) {
1187 *MaxUncompressedBlock = 128;
1188 *MaxCompressedBlock = 128;
1189 *Independent64ByteBlock = false;
1190 MaximumDCCCompressionSurface = 2.0;
1191 } else if (Request == REQ_128BytesNonContiguous) {
1192 *MaxUncompressedBlock = 256;
1193 *MaxCompressedBlock = 64;
1194 *Independent64ByteBlock = true;
1195 MaximumDCCCompressionSurface = 4.0;
1196 } else {
1197 *MaxUncompressedBlock = 0;
1198 *MaxCompressedBlock = 0;
1199 *Independent64ByteBlock = 0;
1200 MaximumDCCCompressionSurface = 0.0;
1201 }
1202
1203 return MaximumDCCCompressionSurface;
1204 }
1205
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1206 static double CalculatePrefetchSourceLines(
1207 struct display_mode_lib *mode_lib,
1208 double VRatio,
1209 double vtaps,
1210 bool Interlace,
1211 bool ProgressiveToInterlaceUnitInOPP,
1212 unsigned int SwathHeight,
1213 unsigned int ViewportYStart,
1214 double *VInitPreFill,
1215 unsigned int *MaxNumSwath)
1216 {
1217 unsigned int MaxPartialSwath;
1218
1219 if (ProgressiveToInterlaceUnitInOPP)
1220 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1221 else
1222 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1223
1224 if (!mode_lib->vba.IgnoreViewportPositioning) {
1225
1226 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1227
1228 if (*VInitPreFill > 1.0)
1229 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1230 else
1231 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1232 % SwathHeight;
1233 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1234
1235 } else {
1236
1237 if (ViewportYStart != 0)
1238 dml_print(
1239 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1240
1241 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1242
1243 if (*VInitPreFill > 1.0)
1244 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1245 else
1246 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1247 % SwathHeight;
1248 }
1249
1250 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1251 }
1252
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int ViewportWidth,unsigned int ViewportHeight,unsigned int SwathWidth,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxPageTableLevels,unsigned int HostVMCachedPageTableLevels,unsigned int VMMPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1253 static unsigned int CalculateVMAndRowBytes(
1254 struct display_mode_lib *mode_lib,
1255 bool DCCEnable,
1256 unsigned int BlockHeight256Bytes,
1257 unsigned int BlockWidth256Bytes,
1258 enum source_format_class SourcePixelFormat,
1259 unsigned int SurfaceTiling,
1260 unsigned int BytePerPixel,
1261 enum scan_direction_class ScanDirection,
1262 unsigned int ViewportWidth,
1263 unsigned int ViewportHeight,
1264 unsigned int SwathWidth,
1265 bool GPUVMEnable,
1266 bool HostVMEnable,
1267 unsigned int HostVMMaxPageTableLevels,
1268 unsigned int HostVMCachedPageTableLevels,
1269 unsigned int VMMPageSize,
1270 unsigned int PTEBufferSizeInRequests,
1271 unsigned int Pitch,
1272 unsigned int DCCMetaPitch,
1273 unsigned int *MacroTileWidth,
1274 unsigned int *MetaRowByte,
1275 unsigned int *PixelPTEBytesPerRow,
1276 bool *PTEBufferSizeNotExceeded,
1277 unsigned int *dpte_row_width_ub,
1278 unsigned int *dpte_row_height,
1279 unsigned int *MetaRequestWidth,
1280 unsigned int *MetaRequestHeight,
1281 unsigned int *meta_row_width,
1282 unsigned int *meta_row_height,
1283 unsigned int *vm_group_bytes,
1284 unsigned int *dpte_group_bytes,
1285 unsigned int *PixelPTEReqWidth,
1286 unsigned int *PixelPTEReqHeight,
1287 unsigned int *PTERequestSize,
1288 unsigned int *DPDE0BytesFrame,
1289 unsigned int *MetaPTEBytesFrame)
1290 {
1291 unsigned int MPDEBytesFrame;
1292 unsigned int DCCMetaSurfaceBytes;
1293 unsigned int MacroTileSizeBytes;
1294 unsigned int MacroTileHeight;
1295 unsigned int ExtraDPDEBytesFrame;
1296 unsigned int PDEAndMetaPTEBytesFrame;
1297 unsigned int PixelPTEReqHeightPTEs = 0;
1298
1299 if (DCCEnable == true) {
1300 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1301 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1302 if (ScanDirection == dm_horz) {
1303 *meta_row_height = *MetaRequestHeight;
1304 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1305 + *MetaRequestWidth;
1306 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1307 } else {
1308 *meta_row_height = *MetaRequestWidth;
1309 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1310 + *MetaRequestHeight;
1311 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1312 }
1313 if (ScanDirection == dm_horz) {
1314 DCCMetaSurfaceBytes = DCCMetaPitch
1315 * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1316 + 64 * BlockHeight256Bytes) * BytePerPixel
1317 / 256;
1318 } else {
1319 DCCMetaSurfaceBytes = DCCMetaPitch
1320 * (dml_ceil(
1321 (double) ViewportHeight - 1,
1322 64 * BlockHeight256Bytes)
1323 + 64 * BlockHeight256Bytes) * BytePerPixel
1324 / 256;
1325 }
1326 if (GPUVMEnable == true) {
1327 *MetaPTEBytesFrame = (dml_ceil(
1328 (double) (DCCMetaSurfaceBytes - VMMPageSize)
1329 / (8 * VMMPageSize),
1330 1) + 1) * 64;
1331 MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2);
1332 } else {
1333 *MetaPTEBytesFrame = 0;
1334 MPDEBytesFrame = 0;
1335 }
1336 } else {
1337 *MetaPTEBytesFrame = 0;
1338 MPDEBytesFrame = 0;
1339 *MetaRowByte = 0;
1340 }
1341
1342 if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
1343 MacroTileSizeBytes = 256;
1344 MacroTileHeight = BlockHeight256Bytes;
1345 } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
1346 || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) {
1347 MacroTileSizeBytes = 4096;
1348 MacroTileHeight = 4 * BlockHeight256Bytes;
1349 } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t
1350 || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d
1351 || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x
1352 || SurfaceTiling == dm_sw_64kb_r_x) {
1353 MacroTileSizeBytes = 65536;
1354 MacroTileHeight = 16 * BlockHeight256Bytes;
1355 } else {
1356 MacroTileSizeBytes = 262144;
1357 MacroTileHeight = 32 * BlockHeight256Bytes;
1358 }
1359 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1360
1361 if (GPUVMEnable == true && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) {
1362 if (ScanDirection == dm_horz) {
1363 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1364 } else {
1365 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1366 }
1367 ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3);
1368 } else {
1369 *DPDE0BytesFrame = 0;
1370 ExtraDPDEBytesFrame = 0;
1371 }
1372
1373 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1374 + ExtraDPDEBytesFrame;
1375
1376 if (HostVMEnable == true) {
1377 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
1378 }
1379
1380 if (GPUVMEnable == true) {
1381 double FractionOfPTEReturnDrop;
1382
1383 if (SurfaceTiling == dm_sw_linear) {
1384 PixelPTEReqHeightPTEs = 1;
1385 *PixelPTEReqHeight = 1;
1386 *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel;
1387 *PTERequestSize = 64;
1388 FractionOfPTEReturnDrop = 0;
1389 } else if (MacroTileSizeBytes == 4096) {
1390 PixelPTEReqHeightPTEs = 1;
1391 *PixelPTEReqHeight = MacroTileHeight;
1392 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1393 *PTERequestSize = 64;
1394 if (ScanDirection == dm_horz)
1395 FractionOfPTEReturnDrop = 0;
1396 else
1397 FractionOfPTEReturnDrop = 7 / 8;
1398 } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
1399 PixelPTEReqHeightPTEs = 16;
1400 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1401 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1402 *PTERequestSize = 128;
1403 FractionOfPTEReturnDrop = 0;
1404 } else {
1405 PixelPTEReqHeightPTEs = 1;
1406 *PixelPTEReqHeight = MacroTileHeight;
1407 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1408 *PTERequestSize = 64;
1409 FractionOfPTEReturnDrop = 0;
1410 }
1411
1412 if (SurfaceTiling == dm_sw_linear) {
1413 *dpte_row_height = dml_min(128,
1414 1 << (unsigned int) dml_floor(
1415 dml_log2(
1416 (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch),
1417 1));
1418 *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1419 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1420 } else if (ScanDirection == dm_horz) {
1421 *dpte_row_height = *PixelPTEReqHeight;
1422 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1423 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1424 } else {
1425 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1426 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1427 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1428 }
1429 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1430 <= 64 * PTEBufferSizeInRequests) {
1431 *PTEBufferSizeNotExceeded = true;
1432 } else {
1433 *PTEBufferSizeNotExceeded = false;
1434 }
1435 } else {
1436 *PixelPTEBytesPerRow = 0;
1437 *PTEBufferSizeNotExceeded = true;
1438 }
1439 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame);
1440
1441 if (HostVMEnable == true) {
1442 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
1443 }
1444
1445 if (HostVMEnable == true) {
1446 *vm_group_bytes = 512;
1447 *dpte_group_bytes = 512;
1448 } else if (GPUVMEnable == true) {
1449 *vm_group_bytes = 2048;
1450 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) {
1451 *dpte_group_bytes = 512;
1452 } else {
1453 *dpte_group_bytes = 2048;
1454 }
1455 } else {
1456 *vm_group_bytes = 0;
1457 *dpte_group_bytes = 0;
1458 }
1459
1460 return PDEAndMetaPTEBytesFrame;
1461 }
1462
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1463 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1464 struct display_mode_lib *mode_lib)
1465 {
1466 struct vba_vars_st *locals = &mode_lib->vba;
1467 unsigned int j, k;
1468
1469 mode_lib->vba.WritebackDISPCLK = 0.0;
1470 mode_lib->vba.DISPCLKWithRamping = 0;
1471 mode_lib->vba.DISPCLKWithoutRamping = 0;
1472 mode_lib->vba.GlobalDPPCLK = 0.0;
1473
1474 // DISPCLK and DPPCLK Calculation
1475 //
1476 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1477 if (mode_lib->vba.WritebackEnable[k]) {
1478 mode_lib->vba.WritebackDISPCLK =
1479 dml_max(
1480 mode_lib->vba.WritebackDISPCLK,
1481 CalculateWriteBackDISPCLK(
1482 mode_lib->vba.WritebackPixelFormat[k],
1483 mode_lib->vba.PixelClock[k],
1484 mode_lib->vba.WritebackHRatio[k],
1485 mode_lib->vba.WritebackVRatio[k],
1486 mode_lib->vba.WritebackLumaHTaps[k],
1487 mode_lib->vba.WritebackLumaVTaps[k],
1488 mode_lib->vba.WritebackChromaHTaps[k],
1489 mode_lib->vba.WritebackChromaVTaps[k],
1490 mode_lib->vba.WritebackDestinationWidth[k],
1491 mode_lib->vba.HTotal[k],
1492 mode_lib->vba.WritebackChromaLineBufferWidth));
1493 }
1494 }
1495
1496 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1497 if (mode_lib->vba.HRatio[k] > 1) {
1498 locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1499 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1500 mode_lib->vba.MaxPSCLToLBThroughput
1501 * mode_lib->vba.HRatio[k]
1502 / dml_ceil(
1503 mode_lib->vba.htaps[k]
1504 / 6.0,
1505 1));
1506 } else {
1507 locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1508 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1509 mode_lib->vba.MaxPSCLToLBThroughput);
1510 }
1511
1512 mode_lib->vba.DPPCLKUsingSingleDPPLuma =
1513 mode_lib->vba.PixelClock[k]
1514 * dml_max(
1515 mode_lib->vba.vtaps[k] / 6.0
1516 * dml_min(
1517 1.0,
1518 mode_lib->vba.HRatio[k]),
1519 dml_max(
1520 mode_lib->vba.HRatio[k]
1521 * mode_lib->vba.VRatio[k]
1522 / locals->PSCL_THROUGHPUT_LUMA[k],
1523 1.0));
1524
1525 if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6)
1526 && mode_lib->vba.DPPCLKUsingSingleDPPLuma
1527 < 2 * mode_lib->vba.PixelClock[k]) {
1528 mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k];
1529 }
1530
1531 if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
1532 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
1533 locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1534 locals->DPPCLKUsingSingleDPP[k] =
1535 mode_lib->vba.DPPCLKUsingSingleDPPLuma;
1536 } else {
1537 if (mode_lib->vba.HRatio[k] > 1) {
1538 locals->PSCL_THROUGHPUT_CHROMA[k] =
1539 dml_min(
1540 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1541 mode_lib->vba.MaxPSCLToLBThroughput
1542 * mode_lib->vba.HRatio[k]
1543 / 2
1544 / dml_ceil(
1545 mode_lib->vba.HTAPsChroma[k]
1546 / 6.0,
1547 1.0));
1548 } else {
1549 locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1550 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1551 mode_lib->vba.MaxPSCLToLBThroughput);
1552 }
1553 mode_lib->vba.DPPCLKUsingSingleDPPChroma =
1554 mode_lib->vba.PixelClock[k]
1555 * dml_max(
1556 mode_lib->vba.VTAPsChroma[k]
1557 / 6.0
1558 * dml_min(
1559 1.0,
1560 mode_lib->vba.HRatio[k]
1561 / 2),
1562 dml_max(
1563 mode_lib->vba.HRatio[k]
1564 * mode_lib->vba.VRatio[k]
1565 / 4
1566 / locals->PSCL_THROUGHPUT_CHROMA[k],
1567 1.0));
1568
1569 if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6)
1570 && mode_lib->vba.DPPCLKUsingSingleDPPChroma
1571 < 2 * mode_lib->vba.PixelClock[k]) {
1572 mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2
1573 * mode_lib->vba.PixelClock[k];
1574 }
1575
1576 locals->DPPCLKUsingSingleDPP[k] = dml_max(
1577 mode_lib->vba.DPPCLKUsingSingleDPPLuma,
1578 mode_lib->vba.DPPCLKUsingSingleDPPChroma);
1579 }
1580 }
1581
1582 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1583 if (mode_lib->vba.BlendingAndTiming[k] != k)
1584 continue;
1585 if (mode_lib->vba.ODMCombineEnabled[k]) {
1586 mode_lib->vba.DISPCLKWithRamping =
1587 dml_max(
1588 mode_lib->vba.DISPCLKWithRamping,
1589 mode_lib->vba.PixelClock[k] / 2
1590 * (1
1591 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1592 / 100)
1593 * (1
1594 + mode_lib->vba.DISPCLKRampingMargin
1595 / 100));
1596 mode_lib->vba.DISPCLKWithoutRamping =
1597 dml_max(
1598 mode_lib->vba.DISPCLKWithoutRamping,
1599 mode_lib->vba.PixelClock[k] / 2
1600 * (1
1601 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1602 / 100));
1603 } else if (!mode_lib->vba.ODMCombineEnabled[k]) {
1604 mode_lib->vba.DISPCLKWithRamping =
1605 dml_max(
1606 mode_lib->vba.DISPCLKWithRamping,
1607 mode_lib->vba.PixelClock[k]
1608 * (1
1609 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1610 / 100)
1611 * (1
1612 + mode_lib->vba.DISPCLKRampingMargin
1613 / 100));
1614 mode_lib->vba.DISPCLKWithoutRamping =
1615 dml_max(
1616 mode_lib->vba.DISPCLKWithoutRamping,
1617 mode_lib->vba.PixelClock[k]
1618 * (1
1619 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1620 / 100));
1621 }
1622 }
1623
1624 mode_lib->vba.DISPCLKWithRamping = dml_max(
1625 mode_lib->vba.DISPCLKWithRamping,
1626 mode_lib->vba.WritebackDISPCLK);
1627 mode_lib->vba.DISPCLKWithoutRamping = dml_max(
1628 mode_lib->vba.DISPCLKWithoutRamping,
1629 mode_lib->vba.WritebackDISPCLK);
1630
1631 ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0);
1632 mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1633 mode_lib->vba.DISPCLKWithRamping,
1634 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1635 mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1636 mode_lib->vba.DISPCLKWithoutRamping,
1637 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1638 mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1639 mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz,
1640 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1641 if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity
1642 > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
1643 mode_lib->vba.DISPCLK_calculated =
1644 mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity;
1645 } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity
1646 > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
1647 mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity;
1648 } else {
1649 mode_lib->vba.DISPCLK_calculated =
1650 mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity;
1651 }
1652 DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated);
1653
1654 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1655 mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k]
1656 / mode_lib->vba.DPPPerPlane[k]
1657 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1658 mode_lib->vba.GlobalDPPCLK = dml_max(
1659 mode_lib->vba.GlobalDPPCLK,
1660 mode_lib->vba.DPPCLK_calculated[k]);
1661 }
1662 mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp(
1663 mode_lib->vba.GlobalDPPCLK,
1664 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1665 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1666 mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255
1667 * dml_ceil(
1668 mode_lib->vba.DPPCLK_calculated[k] * 255
1669 / mode_lib->vba.GlobalDPPCLK,
1670 1);
1671 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]);
1672 }
1673
1674 // Urgent and B P-State/DRAM Clock Change Watermark
1675 DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK);
1676 DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN);
1677 DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW);
1678
1679 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1680 bool MainPlaneDoesODMCombine = false;
1681
1682 if (mode_lib->vba.SourceScan[k] == dm_horz)
1683 locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k];
1684 else
1685 locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
1686
1687 if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
1688 MainPlaneDoesODMCombine = true;
1689 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
1690 if (mode_lib->vba.BlendingAndTiming[k] == j
1691 && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
1692 MainPlaneDoesODMCombine = true;
1693
1694 if (MainPlaneDoesODMCombine == true)
1695 locals->SwathWidthY[k] = dml_min(
1696 (double) locals->SwathWidthSingleDPPY[k],
1697 dml_round(
1698 mode_lib->vba.HActive[k] / 2.0
1699 * mode_lib->vba.HRatio[k]));
1700 else
1701 locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k]
1702 / mode_lib->vba.DPPPerPlane[k];
1703 }
1704
1705 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1706 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
1707 locals->BytePerPixelDETY[k] = 8;
1708 locals->BytePerPixelDETC[k] = 0;
1709 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
1710 locals->BytePerPixelDETY[k] = 4;
1711 locals->BytePerPixelDETC[k] = 0;
1712 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
1713 locals->BytePerPixelDETY[k] = 2;
1714 locals->BytePerPixelDETC[k] = 0;
1715 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
1716 locals->BytePerPixelDETY[k] = 1;
1717 locals->BytePerPixelDETC[k] = 0;
1718 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
1719 locals->BytePerPixelDETY[k] = 1;
1720 locals->BytePerPixelDETC[k] = 2;
1721 } else { // dm_420_10
1722 locals->BytePerPixelDETY[k] = 4.0 / 3.0;
1723 locals->BytePerPixelDETC[k] = 8.0 / 3.0;
1724 }
1725 }
1726
1727 mode_lib->vba.TotalDataReadBandwidth = 0.0;
1728 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1729 locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k]
1730 * dml_ceil(locals->BytePerPixelDETY[k], 1)
1731 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
1732 * mode_lib->vba.VRatio[k];
1733 locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k]
1734 / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2)
1735 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
1736 * mode_lib->vba.VRatio[k] / 2;
1737 DTRACE(
1738 " read_bw[%i] = %fBps",
1739 k,
1740 locals->ReadBandwidthPlaneLuma[k]
1741 + locals->ReadBandwidthPlaneChroma[k]);
1742 mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k]
1743 + locals->ReadBandwidthPlaneChroma[k];
1744 }
1745
1746 // DCFCLK Deep Sleep
1747 CalculateDCFCLKDeepSleep(
1748 mode_lib,
1749 mode_lib->vba.NumberOfActivePlanes,
1750 locals->BytePerPixelDETY,
1751 locals->BytePerPixelDETC,
1752 mode_lib->vba.VRatio,
1753 locals->SwathWidthY,
1754 mode_lib->vba.DPPPerPlane,
1755 mode_lib->vba.HRatio,
1756 mode_lib->vba.PixelClock,
1757 locals->PSCL_THROUGHPUT_LUMA,
1758 locals->PSCL_THROUGHPUT_CHROMA,
1759 locals->DPPCLK,
1760 &mode_lib->vba.DCFCLKDeepSleep);
1761
1762 // DSCCLK
1763 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1764 if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
1765 locals->DSCCLK_calculated[k] = 0.0;
1766 } else {
1767 if (mode_lib->vba.OutputFormat[k] == dm_420
1768 || mode_lib->vba.OutputFormat[k] == dm_n422)
1769 mode_lib->vba.DSCFormatFactor = 2;
1770 else
1771 mode_lib->vba.DSCFormatFactor = 1;
1772 if (mode_lib->vba.ODMCombineEnabled[k])
1773 locals->DSCCLK_calculated[k] =
1774 mode_lib->vba.PixelClockBackEnd[k] / 6
1775 / mode_lib->vba.DSCFormatFactor
1776 / (1
1777 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1778 / 100);
1779 else
1780 locals->DSCCLK_calculated[k] =
1781 mode_lib->vba.PixelClockBackEnd[k] / 3
1782 / mode_lib->vba.DSCFormatFactor
1783 / (1
1784 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1785 / 100);
1786 }
1787 }
1788
1789 // DSC Delay
1790 // TODO
1791 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1792 double bpp = mode_lib->vba.OutputBpp[k];
1793 unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k];
1794
1795 if (mode_lib->vba.DSCEnabled[k] && bpp != 0) {
1796 if (!mode_lib->vba.ODMCombineEnabled[k]) {
1797 locals->DSCDelay[k] =
1798 dscceComputeDelay(
1799 mode_lib->vba.DSCInputBitPerComponent[k],
1800 bpp,
1801 dml_ceil(
1802 (double) mode_lib->vba.HActive[k]
1803 / mode_lib->vba.NumberOfDSCSlices[k],
1804 1),
1805 slices,
1806 mode_lib->vba.OutputFormat[k])
1807 + dscComputeDelay(
1808 mode_lib->vba.OutputFormat[k]);
1809 } else {
1810 locals->DSCDelay[k] =
1811 2
1812 * (dscceComputeDelay(
1813 mode_lib->vba.DSCInputBitPerComponent[k],
1814 bpp,
1815 dml_ceil(
1816 (double) mode_lib->vba.HActive[k]
1817 / mode_lib->vba.NumberOfDSCSlices[k],
1818 1),
1819 slices / 2.0,
1820 mode_lib->vba.OutputFormat[k])
1821 + dscComputeDelay(
1822 mode_lib->vba.OutputFormat[k]));
1823 }
1824 locals->DSCDelay[k] = locals->DSCDelay[k]
1825 * mode_lib->vba.PixelClock[k]
1826 / mode_lib->vba.PixelClockBackEnd[k];
1827 } else {
1828 locals->DSCDelay[k] = 0;
1829 }
1830 }
1831
1832 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
1833 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes
1834 if (j != k && mode_lib->vba.BlendingAndTiming[k] == j
1835 && mode_lib->vba.DSCEnabled[j])
1836 locals->DSCDelay[k] = locals->DSCDelay[j];
1837
1838 // Prefetch
1839 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1840 unsigned int PDEAndMetaPTEBytesFrameY;
1841 unsigned int PixelPTEBytesPerRowY;
1842 unsigned int MetaRowByteY;
1843 unsigned int MetaRowByteC;
1844 unsigned int PDEAndMetaPTEBytesFrameC;
1845 unsigned int PixelPTEBytesPerRowC;
1846 bool PTEBufferSizeNotExceededY;
1847 bool PTEBufferSizeNotExceededC;
1848
1849 Calculate256BBlockSizes(
1850 mode_lib->vba.SourcePixelFormat[k],
1851 mode_lib->vba.SurfaceTiling[k],
1852 dml_ceil(locals->BytePerPixelDETY[k], 1),
1853 dml_ceil(locals->BytePerPixelDETC[k], 2),
1854 &locals->BlockHeight256BytesY[k],
1855 &locals->BlockHeight256BytesC[k],
1856 &locals->BlockWidth256BytesY[k],
1857 &locals->BlockWidth256BytesC[k]);
1858
1859 locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
1860 mode_lib,
1861 mode_lib->vba.VRatio[k],
1862 mode_lib->vba.vtaps[k],
1863 mode_lib->vba.Interlace[k],
1864 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
1865 mode_lib->vba.SwathHeightY[k],
1866 mode_lib->vba.ViewportYStartY[k],
1867 &locals->VInitPreFillY[k],
1868 &locals->MaxNumSwathY[k]);
1869
1870 if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
1871 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
1872 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
1873 && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) {
1874 PDEAndMetaPTEBytesFrameC =
1875 CalculateVMAndRowBytes(
1876 mode_lib,
1877 mode_lib->vba.DCCEnable[k],
1878 locals->BlockHeight256BytesC[k],
1879 locals->BlockWidth256BytesC[k],
1880 mode_lib->vba.SourcePixelFormat[k],
1881 mode_lib->vba.SurfaceTiling[k],
1882 dml_ceil(
1883 locals->BytePerPixelDETC[k],
1884 2),
1885 mode_lib->vba.SourceScan[k],
1886 mode_lib->vba.ViewportWidth[k] / 2,
1887 mode_lib->vba.ViewportHeight[k] / 2,
1888 locals->SwathWidthY[k] / 2,
1889 mode_lib->vba.GPUVMEnable,
1890 mode_lib->vba.HostVMEnable,
1891 mode_lib->vba.HostVMMaxPageTableLevels,
1892 mode_lib->vba.HostVMCachedPageTableLevels,
1893 mode_lib->vba.VMMPageSize,
1894 mode_lib->vba.PTEBufferSizeInRequestsChroma,
1895 mode_lib->vba.PitchC[k],
1896 mode_lib->vba.DCCMetaPitchC[k],
1897 &locals->MacroTileWidthC[k],
1898 &MetaRowByteC,
1899 &PixelPTEBytesPerRowC,
1900 &PTEBufferSizeNotExceededC,
1901 &locals->dpte_row_width_chroma_ub[k],
1902 &locals->dpte_row_height_chroma[k],
1903 &locals->meta_req_width_chroma[k],
1904 &locals->meta_req_height_chroma[k],
1905 &locals->meta_row_width_chroma[k],
1906 &locals->meta_row_height_chroma[k],
1907 &locals->vm_group_bytes_chroma,
1908 &locals->dpte_group_bytes_chroma,
1909 &locals->PixelPTEReqWidthC[k],
1910 &locals->PixelPTEReqHeightC[k],
1911 &locals->PTERequestSizeC[k],
1912 &locals->dpde0_bytes_per_frame_ub_c[k],
1913 &locals->meta_pte_bytes_per_frame_ub_c[k]);
1914
1915 locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
1916 mode_lib,
1917 mode_lib->vba.VRatio[k] / 2,
1918 mode_lib->vba.VTAPsChroma[k],
1919 mode_lib->vba.Interlace[k],
1920 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
1921 mode_lib->vba.SwathHeightC[k],
1922 mode_lib->vba.ViewportYStartC[k],
1923 &locals->VInitPreFillC[k],
1924 &locals->MaxNumSwathC[k]);
1925 } else {
1926 PixelPTEBytesPerRowC = 0;
1927 PDEAndMetaPTEBytesFrameC = 0;
1928 MetaRowByteC = 0;
1929 locals->MaxNumSwathC[k] = 0;
1930 locals->PrefetchSourceLinesC[k] = 0;
1931 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
1932 }
1933
1934 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
1935 mode_lib,
1936 mode_lib->vba.DCCEnable[k],
1937 locals->BlockHeight256BytesY[k],
1938 locals->BlockWidth256BytesY[k],
1939 mode_lib->vba.SourcePixelFormat[k],
1940 mode_lib->vba.SurfaceTiling[k],
1941 dml_ceil(locals->BytePerPixelDETY[k], 1),
1942 mode_lib->vba.SourceScan[k],
1943 mode_lib->vba.ViewportWidth[k],
1944 mode_lib->vba.ViewportHeight[k],
1945 locals->SwathWidthY[k],
1946 mode_lib->vba.GPUVMEnable,
1947 mode_lib->vba.HostVMEnable,
1948 mode_lib->vba.HostVMMaxPageTableLevels,
1949 mode_lib->vba.HostVMCachedPageTableLevels,
1950 mode_lib->vba.VMMPageSize,
1951 locals->PTEBufferSizeInRequestsForLuma,
1952 mode_lib->vba.PitchY[k],
1953 mode_lib->vba.DCCMetaPitchY[k],
1954 &locals->MacroTileWidthY[k],
1955 &MetaRowByteY,
1956 &PixelPTEBytesPerRowY,
1957 &PTEBufferSizeNotExceededY,
1958 &locals->dpte_row_width_luma_ub[k],
1959 &locals->dpte_row_height[k],
1960 &locals->meta_req_width[k],
1961 &locals->meta_req_height[k],
1962 &locals->meta_row_width[k],
1963 &locals->meta_row_height[k],
1964 &locals->vm_group_bytes[k],
1965 &locals->dpte_group_bytes[k],
1966 &locals->PixelPTEReqWidthY[k],
1967 &locals->PixelPTEReqHeightY[k],
1968 &locals->PTERequestSizeY[k],
1969 &locals->dpde0_bytes_per_frame_ub_l[k],
1970 &locals->meta_pte_bytes_per_frame_ub_l[k]);
1971
1972 locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
1973 locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
1974 + PDEAndMetaPTEBytesFrameC;
1975 locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
1976
1977 CalculateActiveRowBandwidth(
1978 mode_lib->vba.GPUVMEnable,
1979 mode_lib->vba.SourcePixelFormat[k],
1980 mode_lib->vba.VRatio[k],
1981 mode_lib->vba.DCCEnable[k],
1982 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
1983 MetaRowByteY,
1984 MetaRowByteC,
1985 locals->meta_row_height[k],
1986 locals->meta_row_height_chroma[k],
1987 PixelPTEBytesPerRowY,
1988 PixelPTEBytesPerRowC,
1989 locals->dpte_row_height[k],
1990 locals->dpte_row_height_chroma[k],
1991 &locals->meta_row_bw[k],
1992 &locals->dpte_row_bw[k]);
1993 }
1994
1995 mode_lib->vba.TotalDCCActiveDPP = 0;
1996 mode_lib->vba.TotalActiveDPP = 0;
1997 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1998 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP
1999 + mode_lib->vba.DPPPerPlane[k];
2000 if (mode_lib->vba.DCCEnable[k])
2001 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
2002 + mode_lib->vba.DPPPerPlane[k];
2003 }
2004
2005 mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
2006 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2007 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2008 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
2009
2010 mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency =
2011 (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK
2012 + mode_lib->vba.UrgentOutOfOrderReturnPerChannel
2013 * mode_lib->vba.NumberOfChannels
2014 / mode_lib->vba.ReturnBW;
2015
2016 mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency(
2017 mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency,
2018 mode_lib->vba.TotalActiveDPP,
2019 mode_lib->vba.PixelChunkSizeInKByte,
2020 mode_lib->vba.TotalDCCActiveDPP,
2021 mode_lib->vba.MetaChunkSize,
2022 mode_lib->vba.ReturnBW,
2023 mode_lib->vba.GPUVMEnable,
2024 mode_lib->vba.HostVMEnable,
2025 mode_lib->vba.NumberOfActivePlanes,
2026 mode_lib->vba.DPPPerPlane,
2027 locals->dpte_group_bytes,
2028 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2029 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2030 mode_lib->vba.HostVMMaxPageTableLevels,
2031 mode_lib->vba.HostVMCachedPageTableLevels);
2032
2033
2034 mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep;
2035
2036 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2037 if (mode_lib->vba.BlendingAndTiming[k] == k) {
2038 if (mode_lib->vba.WritebackEnable[k] == true) {
2039 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2040 mode_lib->vba.WritebackLatency
2041 + CalculateWriteBackDelay(
2042 mode_lib->vba.WritebackPixelFormat[k],
2043 mode_lib->vba.WritebackHRatio[k],
2044 mode_lib->vba.WritebackVRatio[k],
2045 mode_lib->vba.WritebackLumaHTaps[k],
2046 mode_lib->vba.WritebackLumaVTaps[k],
2047 mode_lib->vba.WritebackChromaHTaps[k],
2048 mode_lib->vba.WritebackChromaVTaps[k],
2049 mode_lib->vba.WritebackDestinationWidth[k])
2050 / mode_lib->vba.DISPCLK;
2051 } else
2052 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
2053 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
2054 if (mode_lib->vba.BlendingAndTiming[j] == k
2055 && mode_lib->vba.WritebackEnable[j] == true) {
2056 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2057 dml_max(
2058 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k],
2059 mode_lib->vba.WritebackLatency
2060 + CalculateWriteBackDelay(
2061 mode_lib->vba.WritebackPixelFormat[j],
2062 mode_lib->vba.WritebackHRatio[j],
2063 mode_lib->vba.WritebackVRatio[j],
2064 mode_lib->vba.WritebackLumaHTaps[j],
2065 mode_lib->vba.WritebackLumaVTaps[j],
2066 mode_lib->vba.WritebackChromaHTaps[j],
2067 mode_lib->vba.WritebackChromaVTaps[j],
2068 mode_lib->vba.WritebackDestinationWidth[j])
2069 / mode_lib->vba.DISPCLK);
2070 }
2071 }
2072 }
2073 }
2074
2075 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
2076 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
2077 if (mode_lib->vba.BlendingAndTiming[k] == j)
2078 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2079 locals->WritebackDelay[mode_lib->vba.VoltageLevel][j];
2080
2081 mode_lib->vba.VStartupLines = 13;
2082 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2083 locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1));
2084 }
2085
2086 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
2087 locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]);
2088
2089 // We don't really care to iterate between the various prefetch modes
2090 //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode);
2091 mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly);
2092
2093 do {
2094 double MaxTotalRDBandwidth = 0;
2095 double MaxTotalRDBandwidthNoUrgentBurst = 0;
2096 bool DestinationLineTimesForPrefetchLessThan2 = false;
2097 bool VRatioPrefetchMoreThan4 = false;
2098 double TWait = CalculateTWait(
2099 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2100 mode_lib->vba.DRAMClockChangeLatency,
2101 mode_lib->vba.UrgentLatency,
2102 mode_lib->vba.SREnterPlusExitTime);
2103
2104 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2105 Pipe myPipe;
2106 HostVM myHostVM;
2107
2108 if (mode_lib->vba.XFCEnabled[k] == true) {
2109 mode_lib->vba.XFCRemoteSurfaceFlipDelay =
2110 CalculateRemoteSurfaceFlipDelay(
2111 mode_lib,
2112 mode_lib->vba.VRatio[k],
2113 locals->SwathWidthY[k],
2114 dml_ceil(
2115 locals->BytePerPixelDETY[k],
2116 1),
2117 mode_lib->vba.HTotal[k]
2118 / mode_lib->vba.PixelClock[k],
2119 mode_lib->vba.XFCTSlvVupdateOffset,
2120 mode_lib->vba.XFCTSlvVupdateWidth,
2121 mode_lib->vba.XFCTSlvVreadyOffset,
2122 mode_lib->vba.XFCXBUFLatencyTolerance,
2123 mode_lib->vba.XFCFillBWOverhead,
2124 mode_lib->vba.XFCSlvChunkSize,
2125 mode_lib->vba.XFCBusTransportTime,
2126 mode_lib->vba.TCalc,
2127 TWait,
2128 &mode_lib->vba.SrcActiveDrainRate,
2129 &mode_lib->vba.TInitXFill,
2130 &mode_lib->vba.TslvChk);
2131 } else {
2132 mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0;
2133 }
2134
2135 myPipe.DPPCLK = locals->DPPCLK[k];
2136 myPipe.DISPCLK = mode_lib->vba.DISPCLK;
2137 myPipe.PixelClock = mode_lib->vba.PixelClock[k];
2138 myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep;
2139 myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k];
2140 myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
2141 myPipe.SourceScan = mode_lib->vba.SourceScan[k];
2142 myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
2143 myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
2144 myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
2145 myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
2146 myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
2147 myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
2148 myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
2149 myPipe.HTotal = mode_lib->vba.HTotal[k];
2150
2151
2152 myHostVM.Enable = mode_lib->vba.HostVMEnable;
2153 myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
2154 myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
2155
2156 mode_lib->vba.ErrorResult[k] =
2157 CalculatePrefetchSchedule(
2158 mode_lib,
2159 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2160 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2161 &myPipe,
2162 locals->DSCDelay[k],
2163 mode_lib->vba.DPPCLKDelaySubtotal,
2164 mode_lib->vba.DPPCLKDelaySCL,
2165 mode_lib->vba.DPPCLKDelaySCLLBOnly,
2166 mode_lib->vba.DPPCLKDelayCNVCFormater,
2167 mode_lib->vba.DPPCLKDelayCNVCCursor,
2168 mode_lib->vba.DISPCLKDelaySubtotal,
2169 (unsigned int) (locals->SwathWidthY[k]
2170 / mode_lib->vba.HRatio[k]),
2171 mode_lib->vba.OutputFormat[k],
2172 mode_lib->vba.MaxInterDCNTileRepeaters,
2173 dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]),
2174 locals->MaxVStartupLines[k],
2175 mode_lib->vba.GPUVMMaxPageTableLevels,
2176 mode_lib->vba.GPUVMEnable,
2177 &myHostVM,
2178 mode_lib->vba.DynamicMetadataEnable[k],
2179 mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
2180 mode_lib->vba.DynamicMetadataTransmittedBytes[k],
2181 mode_lib->vba.DCCEnable[k],
2182 mode_lib->vba.UrgentLatency,
2183 mode_lib->vba.UrgentExtraLatency,
2184 mode_lib->vba.TCalc,
2185 locals->PDEAndMetaPTEBytesFrame[k],
2186 locals->MetaRowByte[k],
2187 locals->PixelPTEBytesPerRow[k],
2188 locals->PrefetchSourceLinesY[k],
2189 locals->SwathWidthY[k],
2190 locals->BytePerPixelDETY[k],
2191 locals->VInitPreFillY[k],
2192 locals->MaxNumSwathY[k],
2193 locals->PrefetchSourceLinesC[k],
2194 locals->BytePerPixelDETC[k],
2195 locals->VInitPreFillC[k],
2196 locals->MaxNumSwathC[k],
2197 mode_lib->vba.SwathHeightY[k],
2198 mode_lib->vba.SwathHeightC[k],
2199 TWait,
2200 mode_lib->vba.XFCEnabled[k],
2201 mode_lib->vba.XFCRemoteSurfaceFlipDelay,
2202 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
2203 &locals->DSTXAfterScaler[k],
2204 &locals->DSTYAfterScaler[k],
2205 &locals->DestinationLinesForPrefetch[k],
2206 &locals->PrefetchBandwidth[k],
2207 &locals->DestinationLinesToRequestVMInVBlank[k],
2208 &locals->DestinationLinesToRequestRowInVBlank[k],
2209 &locals->VRatioPrefetchY[k],
2210 &locals->VRatioPrefetchC[k],
2211 &locals->RequiredPrefetchPixDataBWLuma[k],
2212 &locals->RequiredPrefetchPixDataBWChroma[k],
2213 &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
2214 &locals->Tno_bw[k],
2215 &locals->prefetch_vmrow_bw[k],
2216 &locals->swath_width_luma_ub[k],
2217 &locals->swath_width_chroma_ub[k],
2218 &mode_lib->vba.VUpdateOffsetPix[k],
2219 &mode_lib->vba.VUpdateWidthPix[k],
2220 &mode_lib->vba.VReadyOffsetPix[k]);
2221 if (mode_lib->vba.BlendingAndTiming[k] == k) {
2222 locals->VStartup[k] = dml_min(
2223 mode_lib->vba.VStartupLines,
2224 locals->MaxVStartupLines[k]);
2225 if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata
2226 != 0) {
2227 locals->VStartup[k] =
2228 locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata;
2229 }
2230 } else {
2231 locals->VStartup[k] =
2232 dml_min(
2233 mode_lib->vba.VStartupLines,
2234 locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]);
2235 }
2236 }
2237
2238 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2239 unsigned int m;
2240
2241 locals->cursor_bw[k] = 0;
2242 locals->cursor_bw_pre[k] = 0;
2243 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
2244 locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
2245 locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k];
2246 }
2247
2248 CalculateUrgentBurstFactor(
2249 mode_lib->vba.DETBufferSizeInKByte,
2250 mode_lib->vba.SwathHeightY[k],
2251 mode_lib->vba.SwathHeightC[k],
2252 locals->SwathWidthY[k],
2253 mode_lib->vba.HTotal[k] /
2254 mode_lib->vba.PixelClock[k],
2255 mode_lib->vba.UrgentLatency,
2256 mode_lib->vba.CursorBufferSize,
2257 mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
2258 dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
2259 mode_lib->vba.VRatio[k],
2260 locals->VRatioPrefetchY[k],
2261 locals->VRatioPrefetchC[k],
2262 locals->BytePerPixelDETY[k],
2263 locals->BytePerPixelDETC[k],
2264 &locals->UrgentBurstFactorCursor[k],
2265 &locals->UrgentBurstFactorCursorPre[k],
2266 &locals->UrgentBurstFactorLuma[k],
2267 &locals->UrgentBurstFactorLumaPre[k],
2268 &locals->UrgentBurstFactorChroma[k],
2269 &locals->UrgentBurstFactorChromaPre[k],
2270 &locals->NotEnoughUrgentLatencyHiding,
2271 &locals->NotEnoughUrgentLatencyHidingPre);
2272
2273 if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
2274 locals->UrgentBurstFactorLuma[k] = 1;
2275 locals->UrgentBurstFactorChroma[k] = 1;
2276 locals->UrgentBurstFactorCursor[k] = 1;
2277 locals->UrgentBurstFactorLumaPre[k] = 1;
2278 locals->UrgentBurstFactorChromaPre[k] = 1;
2279 locals->UrgentBurstFactorCursorPre[k] = 1;
2280 }
2281
2282 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2283 dml_max3(locals->prefetch_vmrow_bw[k],
2284 locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
2285 + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k]
2286 * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
2287 locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k]
2288 * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2289
2290 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2291 dml_max3(locals->prefetch_vmrow_bw[k],
2292 locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k]
2293 + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
2294 locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
2295
2296 if (locals->DestinationLinesForPrefetch[k] < 2)
2297 DestinationLineTimesForPrefetchLessThan2 = true;
2298 if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4)
2299 VRatioPrefetchMoreThan4 = true;
2300 }
2301 mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW;
2302
2303 if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding == 0 && locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2304 && !DestinationLineTimesForPrefetchLessThan2)
2305 mode_lib->vba.PrefetchModeSupported = true;
2306 else {
2307 mode_lib->vba.PrefetchModeSupported = false;
2308 dml_print(
2309 "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2310 }
2311
2312 if (mode_lib->vba.PrefetchModeSupported == true) {
2313 mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW;
2314 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2315 mode_lib->vba.BandwidthAvailableForImmediateFlip =
2316 mode_lib->vba.BandwidthAvailableForImmediateFlip
2317 - dml_max(
2318 locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
2319 + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k]
2320 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
2321 locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] +
2322 locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] +
2323 locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2324 }
2325
2326 mode_lib->vba.TotImmediateFlipBytes = 0;
2327 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2328 mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k];
2329 }
2330 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2331 CalculateFlipSchedule(
2332 mode_lib,
2333 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2334 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2335 mode_lib->vba.UrgentExtraLatency,
2336 mode_lib->vba.UrgentLatency,
2337 mode_lib->vba.GPUVMMaxPageTableLevels,
2338 mode_lib->vba.HostVMEnable,
2339 mode_lib->vba.HostVMMaxPageTableLevels,
2340 mode_lib->vba.HostVMCachedPageTableLevels,
2341 mode_lib->vba.GPUVMEnable,
2342 locals->PDEAndMetaPTEBytesFrame[k],
2343 locals->MetaRowByte[k],
2344 locals->PixelPTEBytesPerRow[k],
2345 mode_lib->vba.BandwidthAvailableForImmediateFlip,
2346 mode_lib->vba.TotImmediateFlipBytes,
2347 mode_lib->vba.SourcePixelFormat[k],
2348 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
2349 mode_lib->vba.VRatio[k],
2350 locals->Tno_bw[k],
2351 mode_lib->vba.DCCEnable[k],
2352 locals->dpte_row_height[k],
2353 locals->meta_row_height[k],
2354 locals->dpte_row_height_chroma[k],
2355 locals->meta_row_height_chroma[k],
2356 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
2357 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
2358 &locals->final_flip_bw[k],
2359 &locals->ImmediateFlipSupportedForPipe[k]);
2360 }
2361 mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
2362 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2363 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2364 mode_lib->vba.total_dcn_read_bw_with_flip =
2365 mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
2366 locals->prefetch_vmrow_bw[k],
2367 locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
2368 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
2369 locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k]
2370 + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k]
2371 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2372 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst =
2373 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst +
2374 dml_max3(locals->prefetch_vmrow_bw[k],
2375 locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k],
2376 locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
2377
2378 }
2379 mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW;
2380
2381 mode_lib->vba.ImmediateFlipSupported = true;
2382 if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) {
2383 mode_lib->vba.ImmediateFlipSupported = false;
2384 }
2385 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2386 if (locals->ImmediateFlipSupportedForPipe[k] == false) {
2387 mode_lib->vba.ImmediateFlipSupported = false;
2388 }
2389 }
2390 } else {
2391 mode_lib->vba.ImmediateFlipSupported = false;
2392 }
2393
2394 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2395 if (mode_lib->vba.ErrorResult[k]) {
2396 mode_lib->vba.PrefetchModeSupported = false;
2397 dml_print(
2398 "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2399 }
2400 }
2401
2402 mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1;
2403 } while (!((mode_lib->vba.PrefetchModeSupported
2404 && ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable)
2405 || mode_lib->vba.ImmediateFlipSupported))
2406 || locals->MaximumMaxVStartupLines < mode_lib->vba.VStartupLines));
2407
2408 //Watermarks and NB P-State/DRAM Clock Change Support
2409 {
2410 enum clock_change_support DRAMClockChangeSupport; // dummy
2411 CalculateWatermarksAndDRAMSpeedChangeSupport(
2412 mode_lib,
2413 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2414 mode_lib->vba.NumberOfActivePlanes,
2415 mode_lib->vba.MaxLineBufferLines,
2416 mode_lib->vba.LineBufferSize,
2417 mode_lib->vba.DPPOutputBufferPixels,
2418 mode_lib->vba.DETBufferSizeInKByte,
2419 mode_lib->vba.WritebackInterfaceLumaBufferSize,
2420 mode_lib->vba.WritebackInterfaceChromaBufferSize,
2421 mode_lib->vba.DCFCLK,
2422 mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
2423 mode_lib->vba.ReturnBW,
2424 mode_lib->vba.GPUVMEnable,
2425 locals->dpte_group_bytes,
2426 mode_lib->vba.MetaChunkSize,
2427 mode_lib->vba.UrgentLatency,
2428 mode_lib->vba.UrgentExtraLatency,
2429 mode_lib->vba.WritebackLatency,
2430 mode_lib->vba.WritebackChunkSize,
2431 mode_lib->vba.SOCCLK,
2432 mode_lib->vba.DRAMClockChangeLatency,
2433 mode_lib->vba.SRExitTime,
2434 mode_lib->vba.SREnterPlusExitTime,
2435 mode_lib->vba.DCFCLKDeepSleep,
2436 mode_lib->vba.DPPPerPlane,
2437 mode_lib->vba.DCCEnable,
2438 locals->DPPCLK,
2439 locals->SwathWidthSingleDPPY,
2440 mode_lib->vba.SwathHeightY,
2441 locals->ReadBandwidthPlaneLuma,
2442 mode_lib->vba.SwathHeightC,
2443 locals->ReadBandwidthPlaneChroma,
2444 mode_lib->vba.LBBitPerPixel,
2445 locals->SwathWidthY,
2446 mode_lib->vba.HRatio,
2447 mode_lib->vba.vtaps,
2448 mode_lib->vba.VTAPsChroma,
2449 mode_lib->vba.VRatio,
2450 mode_lib->vba.HTotal,
2451 mode_lib->vba.PixelClock,
2452 mode_lib->vba.BlendingAndTiming,
2453 locals->BytePerPixelDETY,
2454 locals->BytePerPixelDETC,
2455 mode_lib->vba.WritebackEnable,
2456 mode_lib->vba.WritebackPixelFormat,
2457 mode_lib->vba.WritebackDestinationWidth,
2458 mode_lib->vba.WritebackDestinationHeight,
2459 mode_lib->vba.WritebackSourceHeight,
2460 &DRAMClockChangeSupport,
2461 &mode_lib->vba.UrgentWatermark,
2462 &mode_lib->vba.WritebackUrgentWatermark,
2463 &mode_lib->vba.DRAMClockChangeWatermark,
2464 &mode_lib->vba.WritebackDRAMClockChangeWatermark,
2465 &mode_lib->vba.StutterExitWatermark,
2466 &mode_lib->vba.StutterEnterPlusExitWatermark,
2467 &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
2468 }
2469
2470
2471 //Display Pipeline Delivery Time in Prefetch, Groups
2472 CalculatePixelDeliveryTimes(
2473 mode_lib->vba.NumberOfActivePlanes,
2474 mode_lib->vba.VRatio,
2475 locals->VRatioPrefetchY,
2476 locals->VRatioPrefetchC,
2477 locals->swath_width_luma_ub,
2478 locals->swath_width_chroma_ub,
2479 mode_lib->vba.DPPPerPlane,
2480 mode_lib->vba.HRatio,
2481 mode_lib->vba.PixelClock,
2482 locals->PSCL_THROUGHPUT_LUMA,
2483 locals->PSCL_THROUGHPUT_CHROMA,
2484 locals->DPPCLK,
2485 locals->BytePerPixelDETC,
2486 mode_lib->vba.SourceScan,
2487 locals->BlockWidth256BytesY,
2488 locals->BlockHeight256BytesY,
2489 locals->BlockWidth256BytesC,
2490 locals->BlockHeight256BytesC,
2491 locals->DisplayPipeLineDeliveryTimeLuma,
2492 locals->DisplayPipeLineDeliveryTimeChroma,
2493 locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
2494 locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
2495 locals->DisplayPipeRequestDeliveryTimeLuma,
2496 locals->DisplayPipeRequestDeliveryTimeChroma,
2497 locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2498 locals->DisplayPipeRequestDeliveryTimeChromaPrefetch);
2499
2500 CalculateMetaAndPTETimes(
2501 mode_lib->vba.NumberOfActivePlanes,
2502 mode_lib->vba.GPUVMEnable,
2503 mode_lib->vba.MetaChunkSize,
2504 mode_lib->vba.MinMetaChunkSizeBytes,
2505 mode_lib->vba.GPUVMMaxPageTableLevels,
2506 mode_lib->vba.HTotal,
2507 mode_lib->vba.VRatio,
2508 locals->VRatioPrefetchY,
2509 locals->VRatioPrefetchC,
2510 locals->DestinationLinesToRequestRowInVBlank,
2511 locals->DestinationLinesToRequestRowInImmediateFlip,
2512 locals->DestinationLinesToRequestVMInVBlank,
2513 locals->DestinationLinesToRequestVMInImmediateFlip,
2514 mode_lib->vba.DCCEnable,
2515 mode_lib->vba.PixelClock,
2516 locals->BytePerPixelDETY,
2517 locals->BytePerPixelDETC,
2518 mode_lib->vba.SourceScan,
2519 locals->dpte_row_height,
2520 locals->dpte_row_height_chroma,
2521 locals->meta_row_width,
2522 locals->meta_row_height,
2523 locals->meta_req_width,
2524 locals->meta_req_height,
2525 locals->dpte_group_bytes,
2526 locals->PTERequestSizeY,
2527 locals->PTERequestSizeC,
2528 locals->PixelPTEReqWidthY,
2529 locals->PixelPTEReqHeightY,
2530 locals->PixelPTEReqWidthC,
2531 locals->PixelPTEReqHeightC,
2532 locals->dpte_row_width_luma_ub,
2533 locals->dpte_row_width_chroma_ub,
2534 locals->vm_group_bytes,
2535 locals->dpde0_bytes_per_frame_ub_l,
2536 locals->dpde0_bytes_per_frame_ub_c,
2537 locals->meta_pte_bytes_per_frame_ub_l,
2538 locals->meta_pte_bytes_per_frame_ub_c,
2539 locals->DST_Y_PER_PTE_ROW_NOM_L,
2540 locals->DST_Y_PER_PTE_ROW_NOM_C,
2541 locals->DST_Y_PER_META_ROW_NOM_L,
2542 locals->TimePerMetaChunkNominal,
2543 locals->TimePerMetaChunkVBlank,
2544 locals->TimePerMetaChunkFlip,
2545 locals->time_per_pte_group_nom_luma,
2546 locals->time_per_pte_group_vblank_luma,
2547 locals->time_per_pte_group_flip_luma,
2548 locals->time_per_pte_group_nom_chroma,
2549 locals->time_per_pte_group_vblank_chroma,
2550 locals->time_per_pte_group_flip_chroma,
2551 locals->TimePerVMGroupVBlank,
2552 locals->TimePerVMGroupFlip,
2553 locals->TimePerVMRequestVBlank,
2554 locals->TimePerVMRequestFlip);
2555
2556
2557 // Min TTUVBlank
2558 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2559 if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
2560 locals->AllowDRAMClockChangeDuringVBlank[k] = true;
2561 locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2562 locals->MinTTUVBlank[k] = dml_max(
2563 mode_lib->vba.DRAMClockChangeWatermark,
2564 dml_max(
2565 mode_lib->vba.StutterEnterPlusExitWatermark,
2566 mode_lib->vba.UrgentWatermark));
2567 } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) {
2568 locals->AllowDRAMClockChangeDuringVBlank[k] = false;
2569 locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2570 locals->MinTTUVBlank[k] = dml_max(
2571 mode_lib->vba.StutterEnterPlusExitWatermark,
2572 mode_lib->vba.UrgentWatermark);
2573 } else {
2574 locals->AllowDRAMClockChangeDuringVBlank[k] = false;
2575 locals->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2576 locals->MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark;
2577 }
2578 if (!mode_lib->vba.DynamicMetadataEnable[k])
2579 locals->MinTTUVBlank[k] = mode_lib->vba.TCalc
2580 + locals->MinTTUVBlank[k];
2581 }
2582
2583 // DCC Configuration
2584 mode_lib->vba.ActiveDPPs = 0;
2585 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2586 locals->MaximumDCCCompressionYSurface[k] = CalculateDCCConfiguration(
2587 mode_lib->vba.DCCEnable[k],
2588 false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2589 mode_lib->vba.ViewportWidth[k],
2590 mode_lib->vba.ViewportHeight[k],
2591 mode_lib->vba.DETBufferSizeInKByte * 1024,
2592 locals->BlockHeight256BytesY[k],
2593 mode_lib->vba.SwathHeightY[k],
2594 mode_lib->vba.SurfaceTiling[k],
2595 locals->BytePerPixelDETY[k],
2596 mode_lib->vba.SourceScan[k],
2597 &locals->DCCYMaxUncompressedBlock[k],
2598 &locals->DCCYMaxCompressedBlock[k],
2599 &locals->DCCYIndependent64ByteBlock[k]);
2600 }
2601
2602 //XFC Parameters:
2603 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2604 if (mode_lib->vba.XFCEnabled[k] == true) {
2605 double TWait;
2606
2607 locals->XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset;
2608 locals->XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth;
2609 locals->XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset;
2610 TWait = CalculateTWait(
2611 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2612 mode_lib->vba.DRAMClockChangeLatency,
2613 mode_lib->vba.UrgentLatency,
2614 mode_lib->vba.SREnterPlusExitTime);
2615 mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay(
2616 mode_lib,
2617 mode_lib->vba.VRatio[k],
2618 locals->SwathWidthY[k],
2619 dml_ceil(locals->BytePerPixelDETY[k], 1),
2620 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
2621 mode_lib->vba.XFCTSlvVupdateOffset,
2622 mode_lib->vba.XFCTSlvVupdateWidth,
2623 mode_lib->vba.XFCTSlvVreadyOffset,
2624 mode_lib->vba.XFCXBUFLatencyTolerance,
2625 mode_lib->vba.XFCFillBWOverhead,
2626 mode_lib->vba.XFCSlvChunkSize,
2627 mode_lib->vba.XFCBusTransportTime,
2628 mode_lib->vba.TCalc,
2629 TWait,
2630 &mode_lib->vba.SrcActiveDrainRate,
2631 &mode_lib->vba.TInitXFill,
2632 &mode_lib->vba.TslvChk);
2633 locals->XFCRemoteSurfaceFlipLatency[k] =
2634 dml_floor(
2635 mode_lib->vba.XFCRemoteSurfaceFlipDelay
2636 / (mode_lib->vba.HTotal[k]
2637 / mode_lib->vba.PixelClock[k]),
2638 1);
2639 locals->XFCTransferDelay[k] =
2640 dml_ceil(
2641 mode_lib->vba.XFCBusTransportTime
2642 / (mode_lib->vba.HTotal[k]
2643 / mode_lib->vba.PixelClock[k]),
2644 1);
2645 locals->XFCPrechargeDelay[k] =
2646 dml_ceil(
2647 (mode_lib->vba.XFCBusTransportTime
2648 + mode_lib->vba.TInitXFill
2649 + mode_lib->vba.TslvChk)
2650 / (mode_lib->vba.HTotal[k]
2651 / mode_lib->vba.PixelClock[k]),
2652 1);
2653 mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance
2654 * mode_lib->vba.SrcActiveDrainRate;
2655 mode_lib->vba.FinalFillMargin =
2656 (locals->DestinationLinesToRequestVMInVBlank[k]
2657 + locals->DestinationLinesToRequestRowInVBlank[k])
2658 * mode_lib->vba.HTotal[k]
2659 / mode_lib->vba.PixelClock[k]
2660 * mode_lib->vba.SrcActiveDrainRate
2661 + mode_lib->vba.XFCFillConstant;
2662 mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay
2663 * mode_lib->vba.SrcActiveDrainRate
2664 + mode_lib->vba.FinalFillMargin;
2665 mode_lib->vba.RemainingFillLevel = dml_max(
2666 0.0,
2667 mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel);
2668 mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel
2669 / (mode_lib->vba.SrcActiveDrainRate
2670 * mode_lib->vba.XFCFillBWOverhead / 100);
2671 locals->XFCPrefetchMargin[k] =
2672 mode_lib->vba.XFCRemoteSurfaceFlipDelay
2673 + mode_lib->vba.TFinalxFill
2674 + (locals->DestinationLinesToRequestVMInVBlank[k]
2675 + locals->DestinationLinesToRequestRowInVBlank[k])
2676 * mode_lib->vba.HTotal[k]
2677 / mode_lib->vba.PixelClock[k];
2678 } else {
2679 locals->XFCSlaveVUpdateOffset[k] = 0;
2680 locals->XFCSlaveVupdateWidth[k] = 0;
2681 locals->XFCSlaveVReadyOffset[k] = 0;
2682 locals->XFCRemoteSurfaceFlipLatency[k] = 0;
2683 locals->XFCPrechargeDelay[k] = 0;
2684 locals->XFCTransferDelay[k] = 0;
2685 locals->XFCPrefetchMargin[k] = 0;
2686 }
2687 }
2688
2689 // Stutter Efficiency
2690 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2691 CalculateDETBufferSize(
2692 mode_lib->vba.DETBufferSizeInKByte,
2693 mode_lib->vba.SwathHeightY[k],
2694 mode_lib->vba.SwathHeightC[k],
2695 &locals->DETBufferSizeY[k],
2696 &locals->DETBufferSizeC[k]);
2697
2698 locals->LinesInDETY[k] = locals->DETBufferSizeY[k]
2699 / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k];
2700 locals->LinesInDETYRoundedDownToSwath[k] = dml_floor(
2701 locals->LinesInDETY[k],
2702 mode_lib->vba.SwathHeightY[k]);
2703 locals->FullDETBufferingTimeY[k] =
2704 locals->LinesInDETYRoundedDownToSwath[k]
2705 * (mode_lib->vba.HTotal[k]
2706 / mode_lib->vba.PixelClock[k])
2707 / mode_lib->vba.VRatio[k];
2708 }
2709
2710 mode_lib->vba.StutterPeriod = 999999.0;
2711 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2712 if (locals->FullDETBufferingTimeY[k] < mode_lib->vba.StutterPeriod) {
2713 mode_lib->vba.StutterPeriod = locals->FullDETBufferingTimeY[k];
2714 mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
2715 (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
2716 / mode_lib->vba.PixelClock[k];
2717 locals->BytePerPixelYCriticalPlane = dml_ceil(locals->BytePerPixelDETY[k], 1);
2718 locals->SwathWidthYCriticalPlane = locals->SwathWidthY[k];
2719 locals->LinesToFinishSwathTransferStutterCriticalPlane =
2720 mode_lib->vba.SwathHeightY[k] - (locals->LinesInDETY[k] - locals->LinesInDETYRoundedDownToSwath[k]);
2721 }
2722 }
2723
2724 mode_lib->vba.AverageReadBandwidth = 0.0;
2725 mode_lib->vba.TotalRowReadBandwidth = 0.0;
2726 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2727 unsigned int DCCRateLimit;
2728
2729 if (mode_lib->vba.DCCEnable[k]) {
2730 if (locals->DCCYMaxCompressedBlock[k] == 256)
2731 DCCRateLimit = 4;
2732 else
2733 DCCRateLimit = 2;
2734
2735 mode_lib->vba.AverageReadBandwidth =
2736 mode_lib->vba.AverageReadBandwidth
2737 + (locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k]) /
2738 dml_min(mode_lib->vba.DCCRate[k], DCCRateLimit);
2739 } else {
2740 mode_lib->vba.AverageReadBandwidth =
2741 mode_lib->vba.AverageReadBandwidth
2742 + locals->ReadBandwidthPlaneLuma[k]
2743 + locals->ReadBandwidthPlaneChroma[k];
2744 }
2745 mode_lib->vba.TotalRowReadBandwidth = mode_lib->vba.TotalRowReadBandwidth +
2746 locals->meta_row_bw[k] + locals->dpte_row_bw[k];
2747 }
2748
2749 mode_lib->vba.AverageDCCCompressionRate = mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.AverageReadBandwidth;
2750
2751 mode_lib->vba.PartOfBurstThatFitsInROB =
2752 dml_min(
2753 mode_lib->vba.StutterPeriod
2754 * mode_lib->vba.TotalDataReadBandwidth,
2755 mode_lib->vba.ROBBufferSizeInKByte * 1024
2756 * mode_lib->vba.AverageDCCCompressionRate);
2757 mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB
2758 / mode_lib->vba.AverageDCCCompressionRate / mode_lib->vba.ReturnBW
2759 + (mode_lib->vba.StutterPeriod * mode_lib->vba.TotalDataReadBandwidth
2760 - mode_lib->vba.PartOfBurstThatFitsInROB)
2761 / (mode_lib->vba.DCFCLK * 64)
2762 + mode_lib->vba.StutterPeriod * mode_lib->vba.TotalRowReadBandwidth / mode_lib->vba.ReturnBW;
2763 mode_lib->vba.StutterBurstTime = dml_max(
2764 mode_lib->vba.StutterBurstTime,
2765 (locals->LinesToFinishSwathTransferStutterCriticalPlane * locals->BytePerPixelYCriticalPlane *
2766 locals->SwathWidthYCriticalPlane / mode_lib->vba.ReturnBW)
2767 );
2768
2769 mode_lib->vba.TotalActiveWriteback = 0;
2770 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2771 if (mode_lib->vba.WritebackEnable[k] == true) {
2772 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
2773 }
2774 }
2775
2776 if (mode_lib->vba.TotalActiveWriteback == 0) {
2777 mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1
2778 - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime)
2779 / mode_lib->vba.StutterPeriod) * 100;
2780 } else {
2781 mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0;
2782 }
2783
2784 mode_lib->vba.SmallestVBlank = 999999;
2785 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2786 if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
2787 mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k]
2788 - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k]
2789 / mode_lib->vba.PixelClock[k];
2790 } else {
2791 mode_lib->vba.VBlankTime = 0;
2792 }
2793 mode_lib->vba.SmallestVBlank = dml_min(
2794 mode_lib->vba.SmallestVBlank,
2795 mode_lib->vba.VBlankTime);
2796 }
2797
2798 mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100
2799 * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime
2800 - mode_lib->vba.SmallestVBlank)
2801 + mode_lib->vba.SmallestVBlank)
2802 / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100;
2803 }
2804
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)2805 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
2806 {
2807 // Display Pipe Configuration
2808 double BytePerPixDETY;
2809 double BytePerPixDETC;
2810 double Read256BytesBlockHeightY;
2811 double Read256BytesBlockHeightC;
2812 double Read256BytesBlockWidthY;
2813 double Read256BytesBlockWidthC;
2814 double MaximumSwathHeightY;
2815 double MaximumSwathHeightC;
2816 double MinimumSwathHeightY;
2817 double MinimumSwathHeightC;
2818 double SwathWidth;
2819 double SwathWidthGranularityY;
2820 double SwathWidthGranularityC;
2821 double RoundedUpMaxSwathSizeBytesY;
2822 double RoundedUpMaxSwathSizeBytesC;
2823 unsigned int j, k;
2824
2825 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2826 bool MainPlaneDoesODMCombine = false;
2827
2828 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
2829 BytePerPixDETY = 8;
2830 BytePerPixDETC = 0;
2831 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
2832 BytePerPixDETY = 4;
2833 BytePerPixDETC = 0;
2834 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
2835 BytePerPixDETY = 2;
2836 BytePerPixDETC = 0;
2837 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) {
2838 BytePerPixDETY = 1;
2839 BytePerPixDETC = 0;
2840 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
2841 BytePerPixDETY = 1;
2842 BytePerPixDETC = 2;
2843 } else {
2844 BytePerPixDETY = 4.0 / 3.0;
2845 BytePerPixDETC = 8.0 / 3.0;
2846 }
2847
2848 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2849 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2850 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
2851 || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
2852 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2853 Read256BytesBlockHeightY = 1;
2854 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
2855 Read256BytesBlockHeightY = 4;
2856 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2857 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
2858 Read256BytesBlockHeightY = 8;
2859 } else {
2860 Read256BytesBlockHeightY = 16;
2861 }
2862 Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
2863 / Read256BytesBlockHeightY;
2864 Read256BytesBlockHeightC = 0;
2865 Read256BytesBlockWidthC = 0;
2866 } else {
2867 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2868 Read256BytesBlockHeightY = 1;
2869 Read256BytesBlockHeightC = 1;
2870 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
2871 Read256BytesBlockHeightY = 16;
2872 Read256BytesBlockHeightC = 8;
2873 } else {
2874 Read256BytesBlockHeightY = 8;
2875 Read256BytesBlockHeightC = 8;
2876 }
2877 Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
2878 / Read256BytesBlockHeightY;
2879 Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2)
2880 / Read256BytesBlockHeightC;
2881 }
2882
2883 if (mode_lib->vba.SourceScan[k] == dm_horz) {
2884 MaximumSwathHeightY = Read256BytesBlockHeightY;
2885 MaximumSwathHeightC = Read256BytesBlockHeightC;
2886 } else {
2887 MaximumSwathHeightY = Read256BytesBlockWidthY;
2888 MaximumSwathHeightC = Read256BytesBlockWidthC;
2889 }
2890
2891 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2892 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2893 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
2894 || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
2895 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
2896 || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2897 && (mode_lib->vba.SurfaceTiling[k]
2898 == dm_sw_4kb_s
2899 || mode_lib->vba.SurfaceTiling[k]
2900 == dm_sw_4kb_s_x
2901 || mode_lib->vba.SurfaceTiling[k]
2902 == dm_sw_64kb_s
2903 || mode_lib->vba.SurfaceTiling[k]
2904 == dm_sw_64kb_s_t
2905 || mode_lib->vba.SurfaceTiling[k]
2906 == dm_sw_64kb_s_x
2907 || mode_lib->vba.SurfaceTiling[k]
2908 == dm_sw_var_s
2909 || mode_lib->vba.SurfaceTiling[k]
2910 == dm_sw_var_s_x)
2911 && mode_lib->vba.SourceScan[k] == dm_horz)) {
2912 MinimumSwathHeightY = MaximumSwathHeightY;
2913 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8
2914 && mode_lib->vba.SourceScan[k] != dm_horz) {
2915 MinimumSwathHeightY = MaximumSwathHeightY;
2916 } else {
2917 MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
2918 }
2919 MinimumSwathHeightC = MaximumSwathHeightC;
2920 } else {
2921 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2922 MinimumSwathHeightY = MaximumSwathHeightY;
2923 MinimumSwathHeightC = MaximumSwathHeightC;
2924 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
2925 && mode_lib->vba.SourceScan[k] == dm_horz) {
2926 MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
2927 MinimumSwathHeightC = MaximumSwathHeightC;
2928 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
2929 && mode_lib->vba.SourceScan[k] == dm_horz) {
2930 MinimumSwathHeightC = MaximumSwathHeightC / 2.0;
2931 MinimumSwathHeightY = MaximumSwathHeightY;
2932 } else {
2933 MinimumSwathHeightY = MaximumSwathHeightY;
2934 MinimumSwathHeightC = MaximumSwathHeightC;
2935 }
2936 }
2937
2938 if (mode_lib->vba.SourceScan[k] == dm_horz) {
2939 SwathWidth = mode_lib->vba.ViewportWidth[k];
2940 } else {
2941 SwathWidth = mode_lib->vba.ViewportHeight[k];
2942 }
2943
2944 if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2945 MainPlaneDoesODMCombine = true;
2946 }
2947 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
2948 if (mode_lib->vba.BlendingAndTiming[k] == j
2949 && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2950 MainPlaneDoesODMCombine = true;
2951 }
2952 }
2953
2954 if (MainPlaneDoesODMCombine == true) {
2955 SwathWidth = dml_min(
2956 SwathWidth,
2957 mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]);
2958 } else {
2959 SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k];
2960 }
2961
2962 SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY;
2963 RoundedUpMaxSwathSizeBytesY = (dml_ceil(
2964 (double) (SwathWidth - 1),
2965 SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY
2966 * MaximumSwathHeightY;
2967 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
2968 RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256)
2969 + 256;
2970 }
2971 if (MaximumSwathHeightC > 0) {
2972 SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2)
2973 / MaximumSwathHeightC;
2974 RoundedUpMaxSwathSizeBytesC = (dml_ceil(
2975 (double) (SwathWidth / 2.0 - 1),
2976 SwathWidthGranularityC) + SwathWidthGranularityC)
2977 * BytePerPixDETC * MaximumSwathHeightC;
2978 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
2979 RoundedUpMaxSwathSizeBytesC = dml_ceil(
2980 RoundedUpMaxSwathSizeBytesC,
2981 256) + 256;
2982 }
2983 } else
2984 RoundedUpMaxSwathSizeBytesC = 0.0;
2985
2986 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
2987 <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
2988 mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
2989 mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
2990 } else {
2991 mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY;
2992 mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC;
2993 }
2994
2995 CalculateDETBufferSize(
2996 mode_lib->vba.DETBufferSizeInKByte,
2997 mode_lib->vba.SwathHeightY[k],
2998 mode_lib->vba.SwathHeightC[k],
2999 &mode_lib->vba.DETBufferSizeY[k],
3000 &mode_lib->vba.DETBufferSizeC[k]);
3001 }
3002 }
3003
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3004 static double CalculateTWait(
3005 unsigned int PrefetchMode,
3006 double DRAMClockChangeLatency,
3007 double UrgentLatency,
3008 double SREnterPlusExitTime)
3009 {
3010 if (PrefetchMode == 0) {
3011 return dml_max(
3012 DRAMClockChangeLatency + UrgentLatency,
3013 dml_max(SREnterPlusExitTime, UrgentLatency));
3014 } else if (PrefetchMode == 1) {
3015 return dml_max(SREnterPlusExitTime, UrgentLatency);
3016 } else {
3017 return UrgentLatency;
3018 }
3019 }
3020
CalculateRemoteSurfaceFlipDelay(struct display_mode_lib * mode_lib,double VRatio,double SwathWidth,double Bpp,double LineTime,double XFCTSlvVupdateOffset,double XFCTSlvVupdateWidth,double XFCTSlvVreadyOffset,double XFCXBUFLatencyTolerance,double XFCFillBWOverhead,double XFCSlvChunkSize,double XFCBusTransportTime,double TCalc,double TWait,double * SrcActiveDrainRate,double * TInitXFill,double * TslvChk)3021 static double CalculateRemoteSurfaceFlipDelay(
3022 struct display_mode_lib *mode_lib,
3023 double VRatio,
3024 double SwathWidth,
3025 double Bpp,
3026 double LineTime,
3027 double XFCTSlvVupdateOffset,
3028 double XFCTSlvVupdateWidth,
3029 double XFCTSlvVreadyOffset,
3030 double XFCXBUFLatencyTolerance,
3031 double XFCFillBWOverhead,
3032 double XFCSlvChunkSize,
3033 double XFCBusTransportTime,
3034 double TCalc,
3035 double TWait,
3036 double *SrcActiveDrainRate,
3037 double *TInitXFill,
3038 double *TslvChk)
3039 {
3040 double TSlvSetup, AvgfillRate, result;
3041
3042 *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime;
3043 TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset;
3044 *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100);
3045 AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100);
3046 *TslvChk = XFCSlvChunkSize / AvgfillRate;
3047 dml_print(
3048 "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n",
3049 *SrcActiveDrainRate);
3050 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup);
3051 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill);
3052 dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate);
3053 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk);
3054 result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide
3055 dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result);
3056 return result;
3057 }
3058
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackLumaHTaps,unsigned int WritebackLumaVTaps,unsigned int WritebackChromaHTaps,unsigned int WritebackChromaVTaps,unsigned int WritebackDestinationWidth)3059 static double CalculateWriteBackDelay(
3060 enum source_format_class WritebackPixelFormat,
3061 double WritebackHRatio,
3062 double WritebackVRatio,
3063 unsigned int WritebackLumaHTaps,
3064 unsigned int WritebackLumaVTaps,
3065 unsigned int WritebackChromaHTaps,
3066 unsigned int WritebackChromaVTaps,
3067 unsigned int WritebackDestinationWidth)
3068 {
3069 double CalculateWriteBackDelay =
3070 dml_max(
3071 dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio,
3072 WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1)
3073 * dml_ceil(
3074 WritebackDestinationWidth
3075 / 4.0,
3076 1)
3077 + dml_ceil(1.0 / WritebackVRatio, 1)
3078 * (dml_ceil(
3079 WritebackLumaVTaps
3080 / 4.0,
3081 1) + 4));
3082
3083 if (WritebackPixelFormat != dm_444_32) {
3084 CalculateWriteBackDelay =
3085 dml_max(
3086 CalculateWriteBackDelay,
3087 dml_max(
3088 dml_ceil(
3089 WritebackChromaHTaps
3090 / 2.0,
3091 1)
3092 / (2
3093 * WritebackHRatio),
3094 WritebackChromaVTaps
3095 * dml_ceil(
3096 1
3097 / (2
3098 * WritebackVRatio),
3099 1)
3100 * dml_ceil(
3101 WritebackDestinationWidth
3102 / 2.0
3103 / 2.0,
3104 1)
3105 + dml_ceil(
3106 1
3107 / (2
3108 * WritebackVRatio),
3109 1)
3110 * (dml_ceil(
3111 WritebackChromaVTaps
3112 / 4.0,
3113 1)
3114 + 4)));
3115 }
3116 return CalculateWriteBackDelay;
3117 }
3118
CalculateActiveRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3119 static void CalculateActiveRowBandwidth(
3120 bool GPUVMEnable,
3121 enum source_format_class SourcePixelFormat,
3122 double VRatio,
3123 bool DCCEnable,
3124 double LineTime,
3125 unsigned int MetaRowByteLuma,
3126 unsigned int MetaRowByteChroma,
3127 unsigned int meta_row_height_luma,
3128 unsigned int meta_row_height_chroma,
3129 unsigned int PixelPTEBytesPerRowLuma,
3130 unsigned int PixelPTEBytesPerRowChroma,
3131 unsigned int dpte_row_height_luma,
3132 unsigned int dpte_row_height_chroma,
3133 double *meta_row_bw,
3134 double *dpte_row_bw)
3135 {
3136 if (DCCEnable != true) {
3137 *meta_row_bw = 0;
3138 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3139 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3140 + VRatio / 2 * MetaRowByteChroma
3141 / (meta_row_height_chroma * LineTime);
3142 } else {
3143 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3144 }
3145
3146 if (GPUVMEnable != true) {
3147 *dpte_row_bw = 0;
3148 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3149 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3150 + VRatio / 2 * PixelPTEBytesPerRowChroma
3151 / (dpte_row_height_chroma * LineTime);
3152 } else {
3153 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3154 }
3155 }
3156
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxPageTableLevels,unsigned int HostVMCachedPageTableLevels,bool GPUVMEnable,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3157 static void CalculateFlipSchedule(
3158 struct display_mode_lib *mode_lib,
3159 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3160 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3161 double UrgentExtraLatency,
3162 double UrgentLatency,
3163 unsigned int GPUVMMaxPageTableLevels,
3164 bool HostVMEnable,
3165 unsigned int HostVMMaxPageTableLevels,
3166 unsigned int HostVMCachedPageTableLevels,
3167 bool GPUVMEnable,
3168 double PDEAndMetaPTEBytesPerFrame,
3169 double MetaRowBytes,
3170 double DPTEBytesPerRow,
3171 double BandwidthAvailableForImmediateFlip,
3172 unsigned int TotImmediateFlipBytes,
3173 enum source_format_class SourcePixelFormat,
3174 double LineTime,
3175 double VRatio,
3176 double Tno_bw,
3177 bool DCCEnable,
3178 unsigned int dpte_row_height,
3179 unsigned int meta_row_height,
3180 unsigned int dpte_row_height_chroma,
3181 unsigned int meta_row_height_chroma,
3182 double *DestinationLinesToRequestVMInImmediateFlip,
3183 double *DestinationLinesToRequestRowInImmediateFlip,
3184 double *final_flip_bw,
3185 bool *ImmediateFlipSupportedForPipe)
3186 {
3187 double min_row_time = 0.0;
3188 unsigned int HostVMDynamicLevels;
3189 double TimeForFetchingMetaPTEImmediateFlip;
3190 double TimeForFetchingRowInVBlankImmediateFlip;
3191 double ImmediateFlipBW;
3192 double HostVMInefficiencyFactor;
3193 double VRatioClamped;
3194
3195 if (GPUVMEnable == true && HostVMEnable == true) {
3196 HostVMInefficiencyFactor =
3197 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
3198 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3199 HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
3200 } else {
3201 HostVMInefficiencyFactor = 1;
3202 HostVMDynamicLevels = 0;
3203 }
3204
3205 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow)
3206 * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3207
3208 if (GPUVMEnable == true) {
3209 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3210 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3211 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevels + 1) - 1),
3212 LineTime / 4.0);
3213 } else {
3214 TimeForFetchingMetaPTEImmediateFlip = 0;
3215 }
3216
3217 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3218 if ((GPUVMEnable == true || DCCEnable == true)) {
3219 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevels + 1), LineTime / 4);
3220 } else {
3221 TimeForFetchingRowInVBlankImmediateFlip = 0;
3222 }
3223
3224 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3225 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3226 VRatioClamped = (VRatio < 1.0) ? 1.0 : VRatio;
3227 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3228 if (GPUVMEnable == true && DCCEnable != true) {
3229 min_row_time = dml_min(
3230 dpte_row_height * LineTime / VRatioClamped,
3231 dpte_row_height_chroma * LineTime / (VRatioClamped / 2));
3232 } else if (GPUVMEnable != true && DCCEnable == true) {
3233 min_row_time = dml_min(
3234 meta_row_height * LineTime / VRatioClamped,
3235 meta_row_height_chroma * LineTime / (VRatioClamped / 2));
3236 } else {
3237 min_row_time = dml_min4(
3238 dpte_row_height * LineTime / VRatioClamped,
3239 meta_row_height * LineTime / VRatioClamped,
3240 dpte_row_height_chroma * LineTime / (VRatioClamped / 2),
3241 meta_row_height_chroma * LineTime / (VRatioClamped / 2));
3242 }
3243 } else {
3244 if (GPUVMEnable == true && DCCEnable != true) {
3245 min_row_time = dpte_row_height * LineTime / VRatioClamped;
3246 } else if (GPUVMEnable != true && DCCEnable == true) {
3247 min_row_time = meta_row_height * LineTime / VRatioClamped;
3248 } else {
3249 min_row_time = dml_min(
3250 dpte_row_height * LineTime / VRatioClamped,
3251 meta_row_height * LineTime / VRatioClamped);
3252 }
3253 }
3254
3255 if (*DestinationLinesToRequestVMInImmediateFlip >= 32
3256 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3257 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3258 *ImmediateFlipSupportedForPipe = false;
3259 } else {
3260 *ImmediateFlipSupportedForPipe = true;
3261 }
3262 }
3263
TruncToValidBPP(double DecimalBPP,double DesiredBPP,bool DSCEnabled,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent)3264 static unsigned int TruncToValidBPP(
3265 double DecimalBPP,
3266 double DesiredBPP,
3267 bool DSCEnabled,
3268 enum output_encoder_class Output,
3269 enum output_format_class Format,
3270 unsigned int DSCInputBitPerComponent)
3271 {
3272 if (Output == dm_hdmi) {
3273 if (Format == dm_420) {
3274 if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3275 return 18;
3276 else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
3277 return 15;
3278 else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
3279 return 12;
3280 else
3281 return BPP_INVALID;
3282 } else if (Format == dm_444) {
3283 if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
3284 return 36;
3285 else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
3286 return 30;
3287 else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3288 return 24;
3289 else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3290 return 18;
3291 else
3292 return BPP_INVALID;
3293 } else {
3294 if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3295 return 24;
3296 else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
3297 return 20;
3298 else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
3299 return 16;
3300 else
3301 return BPP_INVALID;
3302 }
3303 } else {
3304 if (DSCEnabled) {
3305 if (Format == dm_420) {
3306 if (DesiredBPP == 0) {
3307 if (DecimalBPP < 6)
3308 return BPP_INVALID;
3309 else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0)
3310 return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0;
3311 else
3312 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3313 } else {
3314 if (DecimalBPP < 6
3315 || DesiredBPP < 6
3316 || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0
3317 || DecimalBPP < DesiredBPP) {
3318 return BPP_INVALID;
3319 } else {
3320 return DesiredBPP;
3321 }
3322 }
3323 } else if (Format == dm_n422) {
3324 if (DesiredBPP == 0) {
3325 if (DecimalBPP < 7)
3326 return BPP_INVALID;
3327 else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0)
3328 return 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3329 else
3330 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3331 } else {
3332 if (DecimalBPP < 7
3333 || DesiredBPP < 7
3334 || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0
3335 || DecimalBPP < DesiredBPP) {
3336 return BPP_INVALID;
3337 } else {
3338 return DesiredBPP;
3339 }
3340 }
3341 } else {
3342 if (DesiredBPP == 0) {
3343 if (DecimalBPP < 8)
3344 return BPP_INVALID;
3345 else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0)
3346 return 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3347 else
3348 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3349 } else {
3350 if (DecimalBPP < 8
3351 || DesiredBPP < 8
3352 || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0
3353 || DecimalBPP < DesiredBPP) {
3354 return BPP_INVALID;
3355 } else {
3356 return DesiredBPP;
3357 }
3358 }
3359 }
3360 } else if (Format == dm_420) {
3361 if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3362 return 18;
3363 else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
3364 return 15;
3365 else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
3366 return 12;
3367 else
3368 return BPP_INVALID;
3369 } else if (Format == dm_s422 || Format == dm_n422) {
3370 if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3371 return 24;
3372 else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
3373 return 20;
3374 else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
3375 return 16;
3376 else
3377 return BPP_INVALID;
3378 } else {
3379 if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
3380 return 36;
3381 else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
3382 return 30;
3383 else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3384 return 24;
3385 else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3386 return 18;
3387 else
3388 return BPP_INVALID;
3389 }
3390 }
3391 }
3392
dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3393 void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3394 {
3395 struct vba_vars_st *locals = &mode_lib->vba;
3396
3397 int i;
3398 unsigned int j, k, m;
3399
3400 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3401
3402 /*Scale Ratio, taps Support Check*/
3403
3404 mode_lib->vba.ScaleRatioAndTapsSupport = true;
3405 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3406 if (mode_lib->vba.ScalerEnabled[k] == false
3407 && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
3408 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
3409 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
3410 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
3411 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)
3412 || mode_lib->vba.HRatio[k] != 1.0
3413 || mode_lib->vba.htaps[k] != 1.0
3414 || mode_lib->vba.VRatio[k] != 1.0
3415 || mode_lib->vba.vtaps[k] != 1.0)) {
3416 mode_lib->vba.ScaleRatioAndTapsSupport = false;
3417 } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0
3418 || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0
3419 || (mode_lib->vba.htaps[k] > 1.0
3420 && (mode_lib->vba.htaps[k] % 2) == 1)
3421 || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
3422 || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
3423 || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
3424 || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
3425 || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
3426 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
3427 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
3428 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
3429 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
3430 && (mode_lib->vba.HRatio[k] / 2.0
3431 > mode_lib->vba.HTAPsChroma[k]
3432 || mode_lib->vba.VRatio[k] / 2.0
3433 > mode_lib->vba.VTAPsChroma[k]))) {
3434 mode_lib->vba.ScaleRatioAndTapsSupport = false;
3435 }
3436 }
3437 /*Source Format, Pixel Format and Scan Support Check*/
3438
3439 mode_lib->vba.SourceFormatPixelAndScanSupport = true;
3440 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3441 if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
3442 && mode_lib->vba.SourceScan[k] != dm_horz)
3443 || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d
3444 || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x
3445 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d
3446 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t
3447 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x
3448 || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d
3449 || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x)
3450 && mode_lib->vba.SourcePixelFormat[k] != dm_444_64)
3451 || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x
3452 && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8
3453 || mode_lib->vba.SourcePixelFormat[k]
3454 == dm_420_8
3455 || mode_lib->vba.SourcePixelFormat[k]
3456 == dm_420_10))
3457 || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
3458 || mode_lib->vba.SurfaceTiling[k]
3459 == dm_sw_gfx7_2d_thin_l_vp)
3460 && !((mode_lib->vba.SourcePixelFormat[k]
3461 == dm_444_64
3462 || mode_lib->vba.SourcePixelFormat[k]
3463 == dm_444_32)
3464 && mode_lib->vba.SourceScan[k]
3465 == dm_horz
3466 && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp
3467 == true
3468 && mode_lib->vba.DCCEnable[k]
3469 == false))
3470 || (mode_lib->vba.DCCEnable[k] == true
3471 && (mode_lib->vba.SurfaceTiling[k]
3472 == dm_sw_linear
3473 || mode_lib->vba.SourcePixelFormat[k]
3474 == dm_420_8
3475 || mode_lib->vba.SourcePixelFormat[k]
3476 == dm_420_10)))) {
3477 mode_lib->vba.SourceFormatPixelAndScanSupport = false;
3478 }
3479 }
3480 /*Bandwidth Support Check*/
3481
3482 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3483 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
3484 locals->BytePerPixelInDETY[k] = 8.0;
3485 locals->BytePerPixelInDETC[k] = 0.0;
3486 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
3487 locals->BytePerPixelInDETY[k] = 4.0;
3488 locals->BytePerPixelInDETC[k] = 0.0;
3489 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16
3490 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
3491 locals->BytePerPixelInDETY[k] = 2.0;
3492 locals->BytePerPixelInDETC[k] = 0.0;
3493 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
3494 locals->BytePerPixelInDETY[k] = 1.0;
3495 locals->BytePerPixelInDETC[k] = 0.0;
3496 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
3497 locals->BytePerPixelInDETY[k] = 1.0;
3498 locals->BytePerPixelInDETC[k] = 2.0;
3499 } else {
3500 locals->BytePerPixelInDETY[k] = 4.0 / 3;
3501 locals->BytePerPixelInDETC[k] = 8.0 / 3;
3502 }
3503 if (mode_lib->vba.SourceScan[k] == dm_horz) {
3504 locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
3505 } else {
3506 locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
3507 }
3508 }
3509 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3510 locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
3511 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
3512 locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0)
3513 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0;
3514 locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k];
3515 }
3516 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3517 if (mode_lib->vba.WritebackEnable[k] == true
3518 && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
3519 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3520 * mode_lib->vba.WritebackDestinationHeight[k]
3521 / (mode_lib->vba.WritebackSourceHeight[k]
3522 * mode_lib->vba.HTotal[k]
3523 / mode_lib->vba.PixelClock[k]) * 4.0;
3524 } else if (mode_lib->vba.WritebackEnable[k] == true
3525 && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
3526 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3527 * mode_lib->vba.WritebackDestinationHeight[k]
3528 / (mode_lib->vba.WritebackSourceHeight[k]
3529 * mode_lib->vba.HTotal[k]
3530 / mode_lib->vba.PixelClock[k]) * 3.0;
3531 } else if (mode_lib->vba.WritebackEnable[k] == true) {
3532 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3533 * mode_lib->vba.WritebackDestinationHeight[k]
3534 / (mode_lib->vba.WritebackSourceHeight[k]
3535 * mode_lib->vba.HTotal[k]
3536 / mode_lib->vba.PixelClock[k]) * 1.5;
3537 } else {
3538 locals->WriteBandwidth[k] = 0.0;
3539 }
3540 }
3541 mode_lib->vba.DCCEnabledInAnyPlane = false;
3542 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3543 if (mode_lib->vba.DCCEnable[k] == true) {
3544 mode_lib->vba.DCCEnabledInAnyPlane = true;
3545 }
3546 }
3547 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3548 locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3(
3549 mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i],
3550 mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels
3551 * mode_lib->vba.DRAMChannelWidth,
3552 mode_lib->vba.FabricClockPerState[i]
3553 * mode_lib->vba.FabricDatapathToDCNDataReturn);
3554 if (mode_lib->vba.HostVMEnable == false) {
3555 locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
3556 * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0;
3557 } else {
3558 locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
3559 * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0;
3560 }
3561 }
3562 /*Writeback Latency support check*/
3563
3564 mode_lib->vba.WritebackLatencySupport = true;
3565 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3566 if (mode_lib->vba.WritebackEnable[k] == true) {
3567 if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
3568 if (locals->WriteBandwidth[k]
3569 > (mode_lib->vba.WritebackInterfaceLumaBufferSize
3570 + mode_lib->vba.WritebackInterfaceChromaBufferSize)
3571 / mode_lib->vba.WritebackLatency) {
3572 mode_lib->vba.WritebackLatencySupport = false;
3573 }
3574 } else {
3575 if (locals->WriteBandwidth[k]
3576 > 1.5
3577 * dml_min(
3578 mode_lib->vba.WritebackInterfaceLumaBufferSize,
3579 2.0
3580 * mode_lib->vba.WritebackInterfaceChromaBufferSize)
3581 / mode_lib->vba.WritebackLatency) {
3582 mode_lib->vba.WritebackLatencySupport = false;
3583 }
3584 }
3585 }
3586 }
3587 /*Re-ordering Buffer Support Check*/
3588
3589 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3590 locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
3591 (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
3592 + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
3593 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
3594 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly)
3595 * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
3596 if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
3597 > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
3598 locals->ROBSupport[i][0] = true;
3599 } else {
3600 locals->ROBSupport[i][0] = false;
3601 }
3602 }
3603 /*Writeback Mode Support Check*/
3604
3605 mode_lib->vba.TotalNumberOfActiveWriteback = 0;
3606 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3607 if (mode_lib->vba.WritebackEnable[k] == true) {
3608 if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0)
3609 mode_lib->vba.ActiveWritebacksPerPlane[k] = 1;
3610 mode_lib->vba.TotalNumberOfActiveWriteback =
3611 mode_lib->vba.TotalNumberOfActiveWriteback
3612 + mode_lib->vba.ActiveWritebacksPerPlane[k];
3613 }
3614 }
3615 mode_lib->vba.WritebackModeSupport = true;
3616 if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) {
3617 mode_lib->vba.WritebackModeSupport = false;
3618 }
3619 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3620 if (mode_lib->vba.WritebackEnable[k] == true
3621 && mode_lib->vba.Writeback10bpc420Supported != true
3622 && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
3623 mode_lib->vba.WritebackModeSupport = false;
3624 }
3625 }
3626 /*Writeback Scale Ratio and Taps Support Check*/
3627
3628 mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
3629 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3630 if (mode_lib->vba.WritebackEnable[k] == true) {
3631 if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false
3632 && (mode_lib->vba.WritebackHRatio[k] != 1.0
3633 || mode_lib->vba.WritebackVRatio[k] != 1.0)) {
3634 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3635 }
3636 if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
3637 || mode_lib->vba.WritebackVRatio[k]
3638 > mode_lib->vba.WritebackMaxVSCLRatio
3639 || mode_lib->vba.WritebackHRatio[k]
3640 < mode_lib->vba.WritebackMinHSCLRatio
3641 || mode_lib->vba.WritebackVRatio[k]
3642 < mode_lib->vba.WritebackMinVSCLRatio
3643 || mode_lib->vba.WritebackLumaHTaps[k]
3644 > mode_lib->vba.WritebackMaxHSCLTaps
3645 || mode_lib->vba.WritebackLumaVTaps[k]
3646 > mode_lib->vba.WritebackMaxVSCLTaps
3647 || mode_lib->vba.WritebackHRatio[k]
3648 > mode_lib->vba.WritebackLumaHTaps[k]
3649 || mode_lib->vba.WritebackVRatio[k]
3650 > mode_lib->vba.WritebackLumaVTaps[k]
3651 || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0
3652 && ((mode_lib->vba.WritebackLumaHTaps[k] % 2)
3653 == 1))
3654 || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32
3655 && (mode_lib->vba.WritebackChromaHTaps[k]
3656 > mode_lib->vba.WritebackMaxHSCLTaps
3657 || mode_lib->vba.WritebackChromaVTaps[k]
3658 > mode_lib->vba.WritebackMaxVSCLTaps
3659 || 2.0
3660 * mode_lib->vba.WritebackHRatio[k]
3661 > mode_lib->vba.WritebackChromaHTaps[k]
3662 || 2.0
3663 * mode_lib->vba.WritebackVRatio[k]
3664 > mode_lib->vba.WritebackChromaVTaps[k]
3665 || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0
3666 && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) {
3667 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3668 }
3669 if (mode_lib->vba.WritebackVRatio[k] < 1.0) {
3670 mode_lib->vba.WritebackLumaVExtra =
3671 dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0);
3672 } else {
3673 mode_lib->vba.WritebackLumaVExtra = -1;
3674 }
3675 if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32
3676 && mode_lib->vba.WritebackLumaVTaps[k]
3677 > (mode_lib->vba.WritebackLineBufferLumaBufferSize
3678 + mode_lib->vba.WritebackLineBufferChromaBufferSize)
3679 / 3.0
3680 / mode_lib->vba.WritebackDestinationWidth[k]
3681 - mode_lib->vba.WritebackLumaVExtra)
3682 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
3683 && mode_lib->vba.WritebackLumaVTaps[k]
3684 > mode_lib->vba.WritebackLineBufferLumaBufferSize
3685 * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
3686 - mode_lib->vba.WritebackLumaVExtra)
3687 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
3688 && mode_lib->vba.WritebackLumaVTaps[k]
3689 > mode_lib->vba.WritebackLineBufferLumaBufferSize
3690 * 8.0 / 10.0
3691 / mode_lib->vba.WritebackDestinationWidth[k]
3692 - mode_lib->vba.WritebackLumaVExtra)) {
3693 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3694 }
3695 if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) {
3696 mode_lib->vba.WritebackChromaVExtra = 0.0;
3697 } else {
3698 mode_lib->vba.WritebackChromaVExtra = -1;
3699 }
3700 if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
3701 && mode_lib->vba.WritebackChromaVTaps[k]
3702 > mode_lib->vba.WritebackLineBufferChromaBufferSize
3703 * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
3704 - mode_lib->vba.WritebackChromaVExtra)
3705 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
3706 && mode_lib->vba.WritebackChromaVTaps[k]
3707 > mode_lib->vba.WritebackLineBufferChromaBufferSize
3708 * 8.0 / 10.0
3709 / mode_lib->vba.WritebackDestinationWidth[k]
3710 - mode_lib->vba.WritebackChromaVExtra)) {
3711 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3712 }
3713 }
3714 }
3715 /*Maximum DISPCLK/DPPCLK Support check*/
3716
3717 mode_lib->vba.WritebackRequiredDISPCLK = 0.0;
3718 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3719 if (mode_lib->vba.WritebackEnable[k] == true) {
3720 mode_lib->vba.WritebackRequiredDISPCLK =
3721 dml_max(
3722 mode_lib->vba.WritebackRequiredDISPCLK,
3723 CalculateWriteBackDISPCLK(
3724 mode_lib->vba.WritebackPixelFormat[k],
3725 mode_lib->vba.PixelClock[k],
3726 mode_lib->vba.WritebackHRatio[k],
3727 mode_lib->vba.WritebackVRatio[k],
3728 mode_lib->vba.WritebackLumaHTaps[k],
3729 mode_lib->vba.WritebackLumaVTaps[k],
3730 mode_lib->vba.WritebackChromaHTaps[k],
3731 mode_lib->vba.WritebackChromaVTaps[k],
3732 mode_lib->vba.WritebackDestinationWidth[k],
3733 mode_lib->vba.HTotal[k],
3734 mode_lib->vba.WritebackChromaLineBufferWidth));
3735 }
3736 }
3737 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3738 if (mode_lib->vba.HRatio[k] > 1.0) {
3739 locals->PSCL_FACTOR[k] = dml_min(
3740 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3741 mode_lib->vba.MaxPSCLToLBThroughput
3742 * mode_lib->vba.HRatio[k]
3743 / dml_ceil(
3744 mode_lib->vba.htaps[k]
3745 / 6.0,
3746 1.0));
3747 } else {
3748 locals->PSCL_FACTOR[k] = dml_min(
3749 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3750 mode_lib->vba.MaxPSCLToLBThroughput);
3751 }
3752 if (locals->BytePerPixelInDETC[k] == 0.0) {
3753 locals->PSCL_FACTOR_CHROMA[k] = 0.0;
3754 locals->MinDPPCLKUsingSingleDPP[k] =
3755 mode_lib->vba.PixelClock[k]
3756 * dml_max3(
3757 mode_lib->vba.vtaps[k] / 6.0
3758 * dml_min(
3759 1.0,
3760 mode_lib->vba.HRatio[k]),
3761 mode_lib->vba.HRatio[k]
3762 * mode_lib->vba.VRatio[k]
3763 / locals->PSCL_FACTOR[k],
3764 1.0);
3765 if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0)
3766 && locals->MinDPPCLKUsingSingleDPP[k]
3767 < 2.0 * mode_lib->vba.PixelClock[k]) {
3768 locals->MinDPPCLKUsingSingleDPP[k] = 2.0
3769 * mode_lib->vba.PixelClock[k];
3770 }
3771 } else {
3772 if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) {
3773 locals->PSCL_FACTOR_CHROMA[k] =
3774 dml_min(
3775 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3776 mode_lib->vba.MaxPSCLToLBThroughput
3777 * mode_lib->vba.HRatio[k]
3778 / 2.0
3779 / dml_ceil(
3780 mode_lib->vba.HTAPsChroma[k]
3781 / 6.0,
3782 1.0));
3783 } else {
3784 locals->PSCL_FACTOR_CHROMA[k] = dml_min(
3785 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3786 mode_lib->vba.MaxPSCLToLBThroughput);
3787 }
3788 locals->MinDPPCLKUsingSingleDPP[k] =
3789 mode_lib->vba.PixelClock[k]
3790 * dml_max5(
3791 mode_lib->vba.vtaps[k] / 6.0
3792 * dml_min(
3793 1.0,
3794 mode_lib->vba.HRatio[k]),
3795 mode_lib->vba.HRatio[k]
3796 * mode_lib->vba.VRatio[k]
3797 / locals->PSCL_FACTOR[k],
3798 mode_lib->vba.VTAPsChroma[k]
3799 / 6.0
3800 * dml_min(
3801 1.0,
3802 mode_lib->vba.HRatio[k]
3803 / 2.0),
3804 mode_lib->vba.HRatio[k]
3805 * mode_lib->vba.VRatio[k]
3806 / 4.0
3807 / locals->PSCL_FACTOR_CHROMA[k],
3808 1.0);
3809 if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0
3810 || mode_lib->vba.HTAPsChroma[k] > 6.0
3811 || mode_lib->vba.VTAPsChroma[k] > 6.0)
3812 && locals->MinDPPCLKUsingSingleDPP[k]
3813 < 2.0 * mode_lib->vba.PixelClock[k]) {
3814 locals->MinDPPCLKUsingSingleDPP[k] = 2.0
3815 * mode_lib->vba.PixelClock[k];
3816 }
3817 }
3818 }
3819 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3820 Calculate256BBlockSizes(
3821 mode_lib->vba.SourcePixelFormat[k],
3822 mode_lib->vba.SurfaceTiling[k],
3823 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
3824 dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
3825 &locals->Read256BlockHeightY[k],
3826 &locals->Read256BlockHeightC[k],
3827 &locals->Read256BlockWidthY[k],
3828 &locals->Read256BlockWidthC[k]);
3829 if (mode_lib->vba.SourceScan[k] == dm_horz) {
3830 locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k];
3831 locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k];
3832 } else {
3833 locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k];
3834 locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k];
3835 }
3836 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
3837 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
3838 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
3839 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16
3840 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) {
3841 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
3842 || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
3843 && (mode_lib->vba.SurfaceTiling[k]
3844 == dm_sw_4kb_s
3845 || mode_lib->vba.SurfaceTiling[k]
3846 == dm_sw_4kb_s_x
3847 || mode_lib->vba.SurfaceTiling[k]
3848 == dm_sw_64kb_s
3849 || mode_lib->vba.SurfaceTiling[k]
3850 == dm_sw_64kb_s_t
3851 || mode_lib->vba.SurfaceTiling[k]
3852 == dm_sw_64kb_s_x
3853 || mode_lib->vba.SurfaceTiling[k]
3854 == dm_sw_var_s
3855 || mode_lib->vba.SurfaceTiling[k]
3856 == dm_sw_var_s_x)
3857 && mode_lib->vba.SourceScan[k] == dm_horz)) {
3858 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3859 } else {
3860 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
3861 / 2.0;
3862 }
3863 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3864 } else {
3865 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
3866 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3867 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3868 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
3869 && mode_lib->vba.SourceScan[k] == dm_horz) {
3870 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
3871 / 2.0;
3872 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3873 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
3874 && mode_lib->vba.SourceScan[k] == dm_horz) {
3875 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]
3876 / 2.0;
3877 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3878 } else {
3879 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3880 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3881 }
3882 }
3883 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
3884 mode_lib->vba.MaximumSwathWidthSupport = 8192.0;
3885 } else {
3886 mode_lib->vba.MaximumSwathWidthSupport = 5120.0;
3887 }
3888 mode_lib->vba.MaximumSwathWidthInDETBuffer =
3889 dml_min(
3890 mode_lib->vba.MaximumSwathWidthSupport,
3891 mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0
3892 / (locals->BytePerPixelInDETY[k]
3893 * locals->MinSwathHeightY[k]
3894 + locals->BytePerPixelInDETC[k]
3895 / 2.0
3896 * locals->MinSwathHeightC[k]));
3897 if (locals->BytePerPixelInDETC[k] == 0.0) {
3898 mode_lib->vba.MaximumSwathWidthInLineBuffer =
3899 mode_lib->vba.LineBufferSize
3900 * dml_max(mode_lib->vba.HRatio[k], 1.0)
3901 / mode_lib->vba.LBBitPerPixel[k]
3902 / (mode_lib->vba.vtaps[k]
3903 + dml_max(
3904 dml_ceil(
3905 mode_lib->vba.VRatio[k],
3906 1.0)
3907 - 2,
3908 0.0));
3909 } else {
3910 mode_lib->vba.MaximumSwathWidthInLineBuffer =
3911 dml_min(
3912 mode_lib->vba.LineBufferSize
3913 * dml_max(
3914 mode_lib->vba.HRatio[k],
3915 1.0)
3916 / mode_lib->vba.LBBitPerPixel[k]
3917 / (mode_lib->vba.vtaps[k]
3918 + dml_max(
3919 dml_ceil(
3920 mode_lib->vba.VRatio[k],
3921 1.0)
3922 - 2,
3923 0.0)),
3924 2.0 * mode_lib->vba.LineBufferSize
3925 * dml_max(
3926 mode_lib->vba.HRatio[k]
3927 / 2.0,
3928 1.0)
3929 / mode_lib->vba.LBBitPerPixel[k]
3930 / (mode_lib->vba.VTAPsChroma[k]
3931 + dml_max(
3932 dml_ceil(
3933 mode_lib->vba.VRatio[k]
3934 / 2.0,
3935 1.0)
3936 - 2,
3937 0.0)));
3938 }
3939 locals->MaximumSwathWidth[k] = dml_min(
3940 mode_lib->vba.MaximumSwathWidthInDETBuffer,
3941 mode_lib->vba.MaximumSwathWidthInLineBuffer);
3942 }
3943 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3944 double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown(
3945 mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states],
3946 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
3947
3948 for (j = 0; j < 2; j++) {
3949 mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
3950 mode_lib->vba.MaxDispclk[i],
3951 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
3952 mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
3953 mode_lib->vba.MaxDppclk[i],
3954 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
3955 locals->RequiredDISPCLK[i][j] = 0.0;
3956 locals->DISPCLK_DPPCLK_Support[i][j] = true;
3957 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3958 mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine =
3959 mode_lib->vba.PixelClock[k]
3960 * (1.0
3961 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
3962 / 100.0)
3963 * (1.0
3964 + mode_lib->vba.DISPCLKRampingMargin
3965 / 100.0);
3966 if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i]
3967 && i == mode_lib->vba.soc.num_states)
3968 mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k]
3969 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3970
3971 mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
3972 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
3973 if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i]
3974 && i == mode_lib->vba.soc.num_states)
3975 mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
3976 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3977
3978 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3979 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
3980 if (mode_lib->vba.ODMCapability) {
3981 if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) {
3982 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3983 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
3984 } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN21_MAX_DSC_IMAGE_WIDTH)) {
3985 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3986 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
3987 } else if (locals->HActive[k] > DCN21_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
3988 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3989 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
3990 }
3991 }
3992
3993 if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
3994 && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
3995 && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
3996 locals->NoOfDPP[i][j][k] = 1;
3997 locals->RequiredDPPCLK[i][j][k] =
3998 locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3999 } else {
4000 locals->NoOfDPP[i][j][k] = 2;
4001 locals->RequiredDPPCLK[i][j][k] =
4002 locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4003 }
4004 locals->RequiredDISPCLK[i][j] = dml_max(
4005 locals->RequiredDISPCLK[i][j],
4006 mode_lib->vba.PlaneRequiredDISPCLK);
4007 if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4008 > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity)
4009 || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) {
4010 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4011 }
4012 }
4013 locals->TotalNumberOfActiveDPP[i][j] = 0.0;
4014 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4015 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4016 if (j == 1) {
4017 while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP
4018 && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) {
4019 double BWOfNonSplitPlaneOfMaximumBandwidth;
4020 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4021
4022 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4023 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4024 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4025 if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) {
4026 BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k];
4027 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4028 }
4029 }
4030 locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4031 locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4032 locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4033 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4034 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1;
4035 }
4036 }
4037 if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) {
4038 locals->RequiredDISPCLK[i][j] = 0.0;
4039 locals->DISPCLK_DPPCLK_Support[i][j] = true;
4040 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4041 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4042 if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
4043 locals->NoOfDPP[i][j][k] = 1;
4044 locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
4045 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4046 } else {
4047 locals->NoOfDPP[i][j][k] = 2;
4048 locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
4049 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4050 }
4051 if (i != mode_lib->vba.soc.num_states) {
4052 mode_lib->vba.PlaneRequiredDISPCLK =
4053 mode_lib->vba.PixelClock[k]
4054 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4055 * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
4056 } else {
4057 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k]
4058 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4059 }
4060 locals->RequiredDISPCLK[i][j] = dml_max(
4061 locals->RequiredDISPCLK[i][j],
4062 mode_lib->vba.PlaneRequiredDISPCLK);
4063 if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4064 > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
4065 || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)
4066 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4067 }
4068 locals->TotalNumberOfActiveDPP[i][j] = 0.0;
4069 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4070 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4071 }
4072 locals->RequiredDISPCLK[i][j] = dml_max(
4073 locals->RequiredDISPCLK[i][j],
4074 mode_lib->vba.WritebackRequiredDISPCLK);
4075 if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity
4076 < mode_lib->vba.WritebackRequiredDISPCLK) {
4077 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4078 }
4079 }
4080 }
4081 /*Viewport Size Check*/
4082
4083 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4084 locals->ViewportSizeSupport[i][0] = true;
4085 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4086 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4087 if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
4088 > locals->MaximumSwathWidth[k]) {
4089 locals->ViewportSizeSupport[i][0] = false;
4090 }
4091 } else {
4092 if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
4093 locals->ViewportSizeSupport[i][0] = false;
4094 }
4095 }
4096 }
4097 }
4098 /*Total Available Pipes Support Check*/
4099
4100 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4101 for (j = 0; j < 2; j++) {
4102 if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP)
4103 locals->TotalAvailablePipesSupport[i][j] = true;
4104 else
4105 locals->TotalAvailablePipesSupport[i][j] = false;
4106 }
4107 }
4108 /*Total Available OTG Support Check*/
4109
4110 mode_lib->vba.TotalNumberOfActiveOTG = 0.0;
4111 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4112 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4113 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG
4114 + 1.0;
4115 }
4116 }
4117 if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) {
4118 mode_lib->vba.NumberOfOTGSupport = true;
4119 } else {
4120 mode_lib->vba.NumberOfOTGSupport = false;
4121 }
4122 /*Display IO and DSC Support Check*/
4123
4124 mode_lib->vba.NonsupportedDSCInputBPC = false;
4125 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4126 if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
4127 || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
4128 || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) {
4129 mode_lib->vba.NonsupportedDSCInputBPC = true;
4130 }
4131 }
4132 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4133 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4134 locals->RequiresDSC[i][k] = false;
4135 locals->RequiresFEC[i][k] = 0;
4136 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4137 if (mode_lib->vba.Output[k] == dm_hdmi) {
4138 locals->RequiresDSC[i][k] = false;
4139 locals->RequiresFEC[i][k] = 0;
4140 locals->OutputBppPerState[i][k] = TruncToValidBPP(
4141 dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24,
4142 mode_lib->vba.ForcedOutputLinkBPP[k],
4143 false,
4144 mode_lib->vba.Output[k],
4145 mode_lib->vba.OutputFormat[k],
4146 mode_lib->vba.DSCInputBitPerComponent[k]);
4147 } else if (mode_lib->vba.Output[k] == dm_dp
4148 || mode_lib->vba.Output[k] == dm_edp) {
4149 if (mode_lib->vba.Output[k] == dm_edp) {
4150 mode_lib->vba.EffectiveFECOverhead = 0.0;
4151 } else {
4152 mode_lib->vba.EffectiveFECOverhead =
4153 mode_lib->vba.FECOverhead;
4154 }
4155 if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) {
4156 mode_lib->vba.Outbpp = TruncToValidBPP(
4157 (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0
4158 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4159 mode_lib->vba.ForcedOutputLinkBPP[k],
4160 false,
4161 mode_lib->vba.Output[k],
4162 mode_lib->vba.OutputFormat[k],
4163 mode_lib->vba.DSCInputBitPerComponent[k]);
4164 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4165 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0
4166 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4167 mode_lib->vba.ForcedOutputLinkBPP[k],
4168 true,
4169 mode_lib->vba.Output[k],
4170 mode_lib->vba.OutputFormat[k],
4171 mode_lib->vba.DSCInputBitPerComponent[k]);
4172 if (mode_lib->vba.DSCEnabled[k] == true) {
4173 locals->RequiresDSC[i][k] = true;
4174 if (mode_lib->vba.Output[k] == dm_dp) {
4175 locals->RequiresFEC[i][k] = true;
4176 } else {
4177 locals->RequiresFEC[i][k] = false;
4178 }
4179 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4180 } else {
4181 locals->RequiresDSC[i][k] = false;
4182 locals->RequiresFEC[i][k] = false;
4183 }
4184 locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
4185 }
4186 if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) {
4187 mode_lib->vba.Outbpp = TruncToValidBPP(
4188 (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0
4189 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4190 mode_lib->vba.ForcedOutputLinkBPP[k],
4191 false,
4192 mode_lib->vba.Output[k],
4193 mode_lib->vba.OutputFormat[k],
4194 mode_lib->vba.DSCInputBitPerComponent[k]);
4195 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4196 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0
4197 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4198 mode_lib->vba.ForcedOutputLinkBPP[k],
4199 true,
4200 mode_lib->vba.Output[k],
4201 mode_lib->vba.OutputFormat[k],
4202 mode_lib->vba.DSCInputBitPerComponent[k]);
4203 if (mode_lib->vba.DSCEnabled[k] == true) {
4204 locals->RequiresDSC[i][k] = true;
4205 if (mode_lib->vba.Output[k] == dm_dp) {
4206 locals->RequiresFEC[i][k] = true;
4207 } else {
4208 locals->RequiresFEC[i][k] = false;
4209 }
4210 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4211 } else {
4212 locals->RequiresDSC[i][k] = false;
4213 locals->RequiresFEC[i][k] = false;
4214 }
4215 locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
4216 }
4217 if (mode_lib->vba.Outbpp == BPP_INVALID
4218 && mode_lib->vba.PHYCLKPerState[i]
4219 >= 810.0) {
4220 mode_lib->vba.Outbpp = TruncToValidBPP(
4221 (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0
4222 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4223 mode_lib->vba.ForcedOutputLinkBPP[k],
4224 false,
4225 mode_lib->vba.Output[k],
4226 mode_lib->vba.OutputFormat[k],
4227 mode_lib->vba.DSCInputBitPerComponent[k]);
4228 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4229 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0
4230 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4231 mode_lib->vba.ForcedOutputLinkBPP[k],
4232 true,
4233 mode_lib->vba.Output[k],
4234 mode_lib->vba.OutputFormat[k],
4235 mode_lib->vba.DSCInputBitPerComponent[k]);
4236 if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) {
4237 locals->RequiresDSC[i][k] = true;
4238 if (mode_lib->vba.Output[k] == dm_dp) {
4239 locals->RequiresFEC[i][k] = true;
4240 } else {
4241 locals->RequiresFEC[i][k] = false;
4242 }
4243 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4244 } else {
4245 locals->RequiresDSC[i][k] = false;
4246 locals->RequiresFEC[i][k] = false;
4247 }
4248 locals->OutputBppPerState[i][k] =
4249 mode_lib->vba.Outbpp;
4250 }
4251 }
4252 } else {
4253 locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE;
4254 }
4255 }
4256 }
4257 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4258 locals->DIOSupport[i] = true;
4259 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4260 if (locals->OutputBppPerState[i][k] == BPP_INVALID
4261 || (mode_lib->vba.OutputFormat[k] == dm_420
4262 && mode_lib->vba.Interlace[k] == true
4263 && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) {
4264 locals->DIOSupport[i] = false;
4265 }
4266 }
4267 }
4268 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4269 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4270 locals->DSCCLKRequiredMoreThanSupported[i] = false;
4271 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4272 if ((mode_lib->vba.Output[k] == dm_dp
4273 || mode_lib->vba.Output[k] == dm_edp)) {
4274 if (mode_lib->vba.OutputFormat[k] == dm_420
4275 || mode_lib->vba.OutputFormat[k]
4276 == dm_n422) {
4277 mode_lib->vba.DSCFormatFactor = 2;
4278 } else {
4279 mode_lib->vba.DSCFormatFactor = 1;
4280 }
4281 if (locals->RequiresDSC[i][k] == true) {
4282 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4283 if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
4284 > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
4285 locals->DSCCLKRequiredMoreThanSupported[i] =
4286 true;
4287 }
4288 } else {
4289 if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor
4290 > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
4291 locals->DSCCLKRequiredMoreThanSupported[i] =
4292 true;
4293 }
4294 }
4295 }
4296 }
4297 }
4298 }
4299 }
4300 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4301 locals->NotEnoughDSCUnits[i] = false;
4302 mode_lib->vba.TotalDSCUnitsRequired = 0.0;
4303 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4304 if (locals->RequiresDSC[i][k] == true) {
4305 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4306 mode_lib->vba.TotalDSCUnitsRequired =
4307 mode_lib->vba.TotalDSCUnitsRequired + 2.0;
4308 } else {
4309 mode_lib->vba.TotalDSCUnitsRequired =
4310 mode_lib->vba.TotalDSCUnitsRequired + 1.0;
4311 }
4312 }
4313 }
4314 if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) {
4315 locals->NotEnoughDSCUnits[i] = true;
4316 }
4317 }
4318 /*DSC Delay per state*/
4319
4320 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4321 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4322 if (mode_lib->vba.BlendingAndTiming[k] != k) {
4323 mode_lib->vba.slices = 0;
4324 } else if (locals->RequiresDSC[i][k] == 0
4325 || locals->RequiresDSC[i][k] == false) {
4326 mode_lib->vba.slices = 0;
4327 } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) {
4328 mode_lib->vba.slices = dml_ceil(
4329 mode_lib->vba.PixelClockBackEnd[k] / 400.0,
4330 4.0);
4331 } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) {
4332 mode_lib->vba.slices = 8.0;
4333 } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) {
4334 mode_lib->vba.slices = 4.0;
4335 } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) {
4336 mode_lib->vba.slices = 2.0;
4337 } else {
4338 mode_lib->vba.slices = 1.0;
4339 }
4340 if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE
4341 || locals->OutputBppPerState[i][k] == BPP_INVALID) {
4342 mode_lib->vba.bpp = 0.0;
4343 } else {
4344 mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
4345 }
4346 if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
4347 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4348 locals->DSCDelayPerState[i][k] =
4349 dscceComputeDelay(
4350 mode_lib->vba.DSCInputBitPerComponent[k],
4351 mode_lib->vba.bpp,
4352 dml_ceil(
4353 mode_lib->vba.HActive[k]
4354 / mode_lib->vba.slices,
4355 1.0),
4356 mode_lib->vba.slices,
4357 mode_lib->vba.OutputFormat[k])
4358 + dscComputeDelay(
4359 mode_lib->vba.OutputFormat[k]);
4360 } else {
4361 locals->DSCDelayPerState[i][k] =
4362 2.0 * (dscceComputeDelay(
4363 mode_lib->vba.DSCInputBitPerComponent[k],
4364 mode_lib->vba.bpp,
4365 dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0),
4366 mode_lib->vba.slices / 2,
4367 mode_lib->vba.OutputFormat[k])
4368 + dscComputeDelay(mode_lib->vba.OutputFormat[k]));
4369 }
4370 locals->DSCDelayPerState[i][k] =
4371 locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k];
4372 } else {
4373 locals->DSCDelayPerState[i][k] = 0.0;
4374 }
4375 }
4376 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4377 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4378 for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) {
4379 if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true)
4380 locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m];
4381 }
4382 }
4383 }
4384 }
4385
4386 //Prefetch Check
4387 for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
4388 for (j = 0; j <= 1; ++j) {
4389 locals->TotalNumberOfDCCActiveDPP[i][j] = 0;
4390 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4391 if (mode_lib->vba.DCCEnable[k] == true)
4392 locals->TotalNumberOfDCCActiveDPP[i][j] = locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4393 }
4394 }
4395 }
4396
4397 mode_lib->vba.UrgentLatency = dml_max3(
4398 mode_lib->vba.UrgentLatencyPixelDataOnly,
4399 mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
4400 mode_lib->vba.UrgentLatencyVMDataOnly);
4401 mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(
4402 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
4403 &mode_lib->vba.MinPrefetchMode,
4404 &mode_lib->vba.MaxPrefetchMode);
4405
4406 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4407 for (j = 0; j < 2; j++) {
4408 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4409 locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k];
4410 locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k];
4411 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4412 locals->SwathWidthYThisState[k] =
4413 dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]));
4414 } else {
4415 locals->SwathWidthYThisState[k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
4416 }
4417 mode_lib->vba.SwathWidthGranularityY = 256.0
4418 / dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
4419 / locals->MaxSwathHeightY[k];
4420 mode_lib->vba.RoundedUpMaxSwathSizeBytesY =
4421 (dml_ceil(locals->SwathWidthYThisState[k] - 1.0, mode_lib->vba.SwathWidthGranularityY)
4422 + mode_lib->vba.SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k];
4423 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
4424 mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil(
4425 mode_lib->vba.RoundedUpMaxSwathSizeBytesY,
4426 256.0) + 256;
4427 }
4428 if (locals->MaxSwathHeightC[k] > 0.0) {
4429 mode_lib->vba.SwathWidthGranularityC = 256.0 / dml_ceil(locals->BytePerPixelInDETC[k], 2.0) / locals->MaxSwathHeightC[k];
4430 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYThisState[k] / 2.0 - 1.0, mode_lib->vba.SwathWidthGranularityC)
4431 + mode_lib->vba.SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k];
4432 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
4433 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil(mode_lib->vba.RoundedUpMaxSwathSizeBytesC, 256.0) + 256;
4434 }
4435 } else {
4436 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0;
4437 }
4438 if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC
4439 <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
4440 locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k];
4441 locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k];
4442 } else {
4443 locals->SwathHeightYThisState[k] =
4444 locals->MinSwathHeightY[k];
4445 locals->SwathHeightCThisState[k] =
4446 locals->MinSwathHeightC[k];
4447 }
4448 }
4449
4450 CalculateDCFCLKDeepSleep(
4451 mode_lib,
4452 mode_lib->vba.NumberOfActivePlanes,
4453 locals->BytePerPixelInDETY,
4454 locals->BytePerPixelInDETC,
4455 mode_lib->vba.VRatio,
4456 locals->SwathWidthYThisState,
4457 locals->NoOfDPPThisState,
4458 mode_lib->vba.HRatio,
4459 mode_lib->vba.PixelClock,
4460 locals->PSCL_FACTOR,
4461 locals->PSCL_FACTOR_CHROMA,
4462 locals->RequiredDPPCLKThisState,
4463 &mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]);
4464
4465 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4466 if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
4467 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
4468 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
4469 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
4470 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) {
4471 mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4472 mode_lib,
4473 mode_lib->vba.DCCEnable[k],
4474 locals->Read256BlockHeightC[k],
4475 locals->Read256BlockWidthC[k],
4476 mode_lib->vba.SourcePixelFormat[k],
4477 mode_lib->vba.SurfaceTiling[k],
4478 dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
4479 mode_lib->vba.SourceScan[k],
4480 mode_lib->vba.ViewportWidth[k] / 2.0,
4481 mode_lib->vba.ViewportHeight[k] / 2.0,
4482 locals->SwathWidthYThisState[k] / 2.0,
4483 mode_lib->vba.GPUVMEnable,
4484 mode_lib->vba.HostVMEnable,
4485 mode_lib->vba.HostVMMaxPageTableLevels,
4486 mode_lib->vba.HostVMCachedPageTableLevels,
4487 mode_lib->vba.VMMPageSize,
4488 mode_lib->vba.PTEBufferSizeInRequestsChroma,
4489 mode_lib->vba.PitchC[k],
4490 0.0,
4491 &locals->MacroTileWidthC[k],
4492 &mode_lib->vba.MetaRowBytesC,
4493 &mode_lib->vba.DPTEBytesPerRowC,
4494 &locals->PTEBufferSizeNotExceededC[i][j][k],
4495 locals->dpte_row_width_chroma_ub,
4496 &locals->dpte_row_height_chroma[k],
4497 &locals->meta_req_width_chroma[k],
4498 &locals->meta_req_height_chroma[k],
4499 &locals->meta_row_width_chroma[k],
4500 &locals->meta_row_height_chroma[k],
4501 &locals->vm_group_bytes_chroma,
4502 &locals->dpte_group_bytes_chroma,
4503 locals->PixelPTEReqWidthC,
4504 locals->PixelPTEReqHeightC,
4505 locals->PTERequestSizeC,
4506 locals->dpde0_bytes_per_frame_ub_c,
4507 locals->meta_pte_bytes_per_frame_ub_c);
4508 locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
4509 mode_lib,
4510 mode_lib->vba.VRatio[k]/2,
4511 mode_lib->vba.VTAPsChroma[k],
4512 mode_lib->vba.Interlace[k],
4513 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4514 locals->SwathHeightCThisState[k],
4515 mode_lib->vba.ViewportYStartC[k],
4516 &locals->PrefillC[k],
4517 &locals->MaxNumSwC[k]);
4518 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma;
4519 } else {
4520 mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
4521 mode_lib->vba.MetaRowBytesC = 0.0;
4522 mode_lib->vba.DPTEBytesPerRowC = 0.0;
4523 locals->PrefetchLinesC[0][0][k] = 0.0;
4524 locals->PTEBufferSizeNotExceededC[i][j][k] = true;
4525 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
4526 }
4527 mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4528 mode_lib,
4529 mode_lib->vba.DCCEnable[k],
4530 locals->Read256BlockHeightY[k],
4531 locals->Read256BlockWidthY[k],
4532 mode_lib->vba.SourcePixelFormat[k],
4533 mode_lib->vba.SurfaceTiling[k],
4534 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
4535 mode_lib->vba.SourceScan[k],
4536 mode_lib->vba.ViewportWidth[k],
4537 mode_lib->vba.ViewportHeight[k],
4538 locals->SwathWidthYThisState[k],
4539 mode_lib->vba.GPUVMEnable,
4540 mode_lib->vba.HostVMEnable,
4541 mode_lib->vba.HostVMMaxPageTableLevels,
4542 mode_lib->vba.HostVMCachedPageTableLevels,
4543 mode_lib->vba.VMMPageSize,
4544 locals->PTEBufferSizeInRequestsForLuma,
4545 mode_lib->vba.PitchY[k],
4546 mode_lib->vba.DCCMetaPitchY[k],
4547 &locals->MacroTileWidthY[k],
4548 &mode_lib->vba.MetaRowBytesY,
4549 &mode_lib->vba.DPTEBytesPerRowY,
4550 &locals->PTEBufferSizeNotExceededY[i][j][k],
4551 locals->dpte_row_width_luma_ub,
4552 &locals->dpte_row_height[k],
4553 &locals->meta_req_width[k],
4554 &locals->meta_req_height[k],
4555 &locals->meta_row_width[k],
4556 &locals->meta_row_height[k],
4557 &locals->vm_group_bytes[k],
4558 &locals->dpte_group_bytes[k],
4559 locals->PixelPTEReqWidthY,
4560 locals->PixelPTEReqHeightY,
4561 locals->PTERequestSizeY,
4562 locals->dpde0_bytes_per_frame_ub_l,
4563 locals->meta_pte_bytes_per_frame_ub_l);
4564 locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
4565 mode_lib,
4566 mode_lib->vba.VRatio[k],
4567 mode_lib->vba.vtaps[k],
4568 mode_lib->vba.Interlace[k],
4569 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4570 locals->SwathHeightYThisState[k],
4571 mode_lib->vba.ViewportYStartY[k],
4572 &locals->PrefillY[k],
4573 &locals->MaxNumSwY[k]);
4574 locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
4575 mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
4576 locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
4577 locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
4578
4579 CalculateActiveRowBandwidth(
4580 mode_lib->vba.GPUVMEnable,
4581 mode_lib->vba.SourcePixelFormat[k],
4582 mode_lib->vba.VRatio[k],
4583 mode_lib->vba.DCCEnable[k],
4584 mode_lib->vba.HTotal[k] /
4585 mode_lib->vba.PixelClock[k],
4586 mode_lib->vba.MetaRowBytesY,
4587 mode_lib->vba.MetaRowBytesC,
4588 locals->meta_row_height[k],
4589 locals->meta_row_height_chroma[k],
4590 mode_lib->vba.DPTEBytesPerRowY,
4591 mode_lib->vba.DPTEBytesPerRowC,
4592 locals->dpte_row_height[k],
4593 locals->dpte_row_height_chroma[k],
4594 &locals->meta_row_bw[k],
4595 &locals->dpte_row_bw[k]);
4596 }
4597 mode_lib->vba.ExtraLatency = CalculateExtraLatency(
4598 locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i],
4599 locals->TotalNumberOfActiveDPP[i][j],
4600 mode_lib->vba.PixelChunkSizeInKByte,
4601 locals->TotalNumberOfDCCActiveDPP[i][j],
4602 mode_lib->vba.MetaChunkSize,
4603 locals->ReturnBWPerState[i][0],
4604 mode_lib->vba.GPUVMEnable,
4605 mode_lib->vba.HostVMEnable,
4606 mode_lib->vba.NumberOfActivePlanes,
4607 locals->NoOfDPPThisState,
4608 locals->dpte_group_bytes,
4609 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4610 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4611 mode_lib->vba.HostVMMaxPageTableLevels,
4612 mode_lib->vba.HostVMCachedPageTableLevels);
4613
4614 mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
4615 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4616 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4617 if (mode_lib->vba.WritebackEnable[k] == true) {
4618 locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency
4619 + CalculateWriteBackDelay(
4620 mode_lib->vba.WritebackPixelFormat[k],
4621 mode_lib->vba.WritebackHRatio[k],
4622 mode_lib->vba.WritebackVRatio[k],
4623 mode_lib->vba.WritebackLumaHTaps[k],
4624 mode_lib->vba.WritebackLumaVTaps[k],
4625 mode_lib->vba.WritebackChromaHTaps[k],
4626 mode_lib->vba.WritebackChromaVTaps[k],
4627 mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j];
4628 } else {
4629 locals->WritebackDelay[i][k] = 0.0;
4630 }
4631 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4632 if (mode_lib->vba.BlendingAndTiming[m] == k
4633 && mode_lib->vba.WritebackEnable[m]
4634 == true) {
4635 locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k],
4636 mode_lib->vba.WritebackLatency + CalculateWriteBackDelay(
4637 mode_lib->vba.WritebackPixelFormat[m],
4638 mode_lib->vba.WritebackHRatio[m],
4639 mode_lib->vba.WritebackVRatio[m],
4640 mode_lib->vba.WritebackLumaHTaps[m],
4641 mode_lib->vba.WritebackLumaVTaps[m],
4642 mode_lib->vba.WritebackChromaHTaps[m],
4643 mode_lib->vba.WritebackChromaVTaps[m],
4644 mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]);
4645 }
4646 }
4647 }
4648 }
4649 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4650 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4651 if (mode_lib->vba.BlendingAndTiming[k] == m) {
4652 locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m];
4653 }
4654 }
4655 }
4656 mode_lib->vba.MaxMaxVStartup[0][0] = 0;
4657 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4658 locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
4659 - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
4660 mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]);
4661 }
4662
4663 mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode;
4664 mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
4665 do {
4666 mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode;
4667 mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
4668
4669 mode_lib->vba.TWait = CalculateTWait(
4670 mode_lib->vba.PrefetchMode[i][j],
4671 mode_lib->vba.DRAMClockChangeLatency,
4672 mode_lib->vba.UrgentLatency,
4673 mode_lib->vba.SREnterPlusExitTime);
4674 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4675 Pipe myPipe;
4676 HostVM myHostVM;
4677
4678 if (mode_lib->vba.XFCEnabled[k] == true) {
4679 mode_lib->vba.XFCRemoteSurfaceFlipDelay =
4680 CalculateRemoteSurfaceFlipDelay(
4681 mode_lib,
4682 mode_lib->vba.VRatio[k],
4683 locals->SwathWidthYThisState[k],
4684 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
4685 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4686 mode_lib->vba.XFCTSlvVupdateOffset,
4687 mode_lib->vba.XFCTSlvVupdateWidth,
4688 mode_lib->vba.XFCTSlvVreadyOffset,
4689 mode_lib->vba.XFCXBUFLatencyTolerance,
4690 mode_lib->vba.XFCFillBWOverhead,
4691 mode_lib->vba.XFCSlvChunkSize,
4692 mode_lib->vba.XFCBusTransportTime,
4693 mode_lib->vba.TimeCalc,
4694 mode_lib->vba.TWait,
4695 &mode_lib->vba.SrcActiveDrainRate,
4696 &mode_lib->vba.TInitXFill,
4697 &mode_lib->vba.TslvChk);
4698 } else {
4699 mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
4700 }
4701
4702 myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
4703 myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
4704 myPipe.PixelClock = mode_lib->vba.PixelClock[k];
4705 myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
4706 myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
4707 myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
4708 myPipe.SourceScan = mode_lib->vba.SourceScan[k];
4709 myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
4710 myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
4711 myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
4712 myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
4713 myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
4714 myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
4715 myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
4716 myPipe.HTotal = mode_lib->vba.HTotal[k];
4717
4718
4719 myHostVM.Enable = mode_lib->vba.HostVMEnable;
4720 myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
4721 myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
4722
4723
4724 mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
4725 mode_lib,
4726 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4727 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4728 &myPipe,
4729 locals->DSCDelayPerState[i][k],
4730 mode_lib->vba.DPPCLKDelaySubtotal,
4731 mode_lib->vba.DPPCLKDelaySCL,
4732 mode_lib->vba.DPPCLKDelaySCLLBOnly,
4733 mode_lib->vba.DPPCLKDelayCNVCFormater,
4734 mode_lib->vba.DPPCLKDelayCNVCCursor,
4735 mode_lib->vba.DISPCLKDelaySubtotal,
4736 locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
4737 mode_lib->vba.OutputFormat[k],
4738 mode_lib->vba.MaxInterDCNTileRepeaters,
4739 dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
4740 locals->MaximumVStartup[0][0][k],
4741 mode_lib->vba.GPUVMMaxPageTableLevels,
4742 mode_lib->vba.GPUVMEnable,
4743 &myHostVM,
4744 mode_lib->vba.DynamicMetadataEnable[k],
4745 mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
4746 mode_lib->vba.DynamicMetadataTransmittedBytes[k],
4747 mode_lib->vba.DCCEnable[k],
4748 mode_lib->vba.UrgentLatency,
4749 mode_lib->vba.ExtraLatency,
4750 mode_lib->vba.TimeCalc,
4751 locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
4752 locals->MetaRowBytes[0][0][k],
4753 locals->DPTEBytesPerRow[0][0][k],
4754 locals->PrefetchLinesY[0][0][k],
4755 locals->SwathWidthYThisState[k],
4756 locals->BytePerPixelInDETY[k],
4757 locals->PrefillY[k],
4758 locals->MaxNumSwY[k],
4759 locals->PrefetchLinesC[0][0][k],
4760 locals->BytePerPixelInDETC[k],
4761 locals->PrefillC[k],
4762 locals->MaxNumSwC[k],
4763 locals->SwathHeightYThisState[k],
4764 locals->SwathHeightCThisState[k],
4765 mode_lib->vba.TWait,
4766 mode_lib->vba.XFCEnabled[k],
4767 mode_lib->vba.XFCRemoteSurfaceFlipDelay,
4768 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4769 &locals->dst_x_after_scaler,
4770 &locals->dst_y_after_scaler,
4771 &locals->LineTimesForPrefetch[k],
4772 &locals->PrefetchBW[k],
4773 &locals->LinesForMetaPTE[k],
4774 &locals->LinesForMetaAndDPTERow[k],
4775 &locals->VRatioPreY[i][j][k],
4776 &locals->VRatioPreC[i][j][k],
4777 &locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
4778 &locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
4779 &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
4780 &locals->Tno_bw[k],
4781 &locals->prefetch_vmrow_bw[k],
4782 locals->swath_width_luma_ub,
4783 locals->swath_width_chroma_ub,
4784 &mode_lib->vba.VUpdateOffsetPix[k],
4785 &mode_lib->vba.VUpdateWidthPix[k],
4786 &mode_lib->vba.VReadyOffsetPix[k]);
4787 }
4788 mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
4789 mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
4790 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4791 unsigned int m;
4792
4793 locals->cursor_bw[k] = 0;
4794 locals->cursor_bw_pre[k] = 0;
4795 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
4796 locals->cursor_bw[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
4797 / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
4798 locals->cursor_bw_pre[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
4799 / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPreY[i][j][k];
4800 }
4801
4802 CalculateUrgentBurstFactor(
4803 mode_lib->vba.DETBufferSizeInKByte,
4804 locals->SwathHeightYThisState[k],
4805 locals->SwathHeightCThisState[k],
4806 locals->SwathWidthYThisState[k],
4807 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4808 mode_lib->vba.UrgentLatency,
4809 mode_lib->vba.CursorBufferSize,
4810 mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
4811 dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
4812 mode_lib->vba.VRatio[k],
4813 locals->VRatioPreY[i][j][k],
4814 locals->VRatioPreC[i][j][k],
4815 locals->BytePerPixelInDETY[k],
4816 locals->BytePerPixelInDETC[k],
4817 &locals->UrgentBurstFactorCursor[k],
4818 &locals->UrgentBurstFactorCursorPre[k],
4819 &locals->UrgentBurstFactorLuma[k],
4820 &locals->UrgentBurstFactorLumaPre[k],
4821 &locals->UrgentBurstFactorChroma[k],
4822 &locals->UrgentBurstFactorChromaPre[k],
4823 &locals->NotEnoughUrgentLatencyHiding,
4824 &locals->NotEnoughUrgentLatencyHidingPre);
4825
4826 if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
4827 locals->UrgentBurstFactorCursor[k] = 1;
4828 locals->UrgentBurstFactorCursorPre[k] = 1;
4829 locals->UrgentBurstFactorLuma[k] = 1;
4830 locals->UrgentBurstFactorLumaPre[k] = 1;
4831 locals->UrgentBurstFactorChroma[k] = 1;
4832 locals->UrgentBurstFactorChromaPre[k] = 1;
4833 }
4834
4835 mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithoutPrefetch
4836 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + locals->ReadBandwidthLuma[k]
4837 * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
4838 * locals->UrgentBurstFactorChroma[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k];
4839 mode_lib->vba.MaximumReadBandwidthWithPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch
4840 + dml_max3(locals->prefetch_vmrow_bw[k],
4841 locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
4842 * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k]
4843 + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
4844 locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
4845 + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
4846 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
4847 }
4848 locals->BandwidthWithoutPrefetchSupported[i][0] = true;
4849 if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]
4850 || locals->NotEnoughUrgentLatencyHiding == 1) {
4851 locals->BandwidthWithoutPrefetchSupported[i][0] = false;
4852 }
4853
4854 locals->PrefetchSupported[i][j] = true;
4855 if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]
4856 || locals->NotEnoughUrgentLatencyHiding == 1
4857 || locals->NotEnoughUrgentLatencyHidingPre == 1) {
4858 locals->PrefetchSupported[i][j] = false;
4859 }
4860 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4861 if (locals->LineTimesForPrefetch[k] < 2.0
4862 || locals->LinesForMetaPTE[k] >= 32.0
4863 || locals->LinesForMetaAndDPTERow[k] >= 16.0
4864 || mode_lib->vba.IsErrorResult[i][j][k] == true) {
4865 locals->PrefetchSupported[i][j] = false;
4866 }
4867 }
4868 locals->VRatioInPrefetchSupported[i][j] = true;
4869 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4870 if (locals->VRatioPreY[i][j][k] > 4.0
4871 || locals->VRatioPreC[i][j][k] > 4.0
4872 || mode_lib->vba.IsErrorResult[i][j][k] == true) {
4873 locals->VRatioInPrefetchSupported[i][j] = false;
4874 }
4875 }
4876 mode_lib->vba.AnyLinesForVMOrRowTooLarge = false;
4877 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4878 if (locals->LinesForMetaAndDPTERow[k] >= 16 || locals->LinesForMetaPTE[k] >= 32) {
4879 mode_lib->vba.AnyLinesForVMOrRowTooLarge = true;
4880 }
4881 }
4882
4883 if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
4884 mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
4885 mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1;
4886 } else {
4887 mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
4888 }
4889 } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
4890 && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
4891 || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode));
4892
4893 if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
4894 mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
4895 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4896 mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip
4897 - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
4898 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
4899 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
4900 locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
4901 + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
4902 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
4903 }
4904 mode_lib->vba.TotImmediateFlipBytes = 0.0;
4905 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4906 mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
4907 + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k];
4908 }
4909
4910 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4911 CalculateFlipSchedule(
4912 mode_lib,
4913 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4914 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4915 mode_lib->vba.ExtraLatency,
4916 mode_lib->vba.UrgentLatency,
4917 mode_lib->vba.GPUVMMaxPageTableLevels,
4918 mode_lib->vba.HostVMEnable,
4919 mode_lib->vba.HostVMMaxPageTableLevels,
4920 mode_lib->vba.HostVMCachedPageTableLevels,
4921 mode_lib->vba.GPUVMEnable,
4922 locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
4923 locals->MetaRowBytes[0][0][k],
4924 locals->DPTEBytesPerRow[0][0][k],
4925 mode_lib->vba.BandwidthAvailableForImmediateFlip,
4926 mode_lib->vba.TotImmediateFlipBytes,
4927 mode_lib->vba.SourcePixelFormat[k],
4928 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4929 mode_lib->vba.VRatio[k],
4930 locals->Tno_bw[k],
4931 mode_lib->vba.DCCEnable[k],
4932 locals->dpte_row_height[k],
4933 locals->meta_row_height[k],
4934 locals->dpte_row_height_chroma[k],
4935 locals->meta_row_height_chroma[k],
4936 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
4937 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
4938 &locals->final_flip_bw[k],
4939 &locals->ImmediateFlipSupportedForPipe[k]);
4940 }
4941 mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
4942 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4943 mode_lib->vba.total_dcn_read_bw_with_flip = mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
4944 locals->prefetch_vmrow_bw[k],
4945 locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
4946 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
4947 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
4948 locals->final_flip_bw[k] + locals->RequiredPrefetchPixelDataBWLuma[i][j][k]
4949 * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixelDataBWChroma[i][j][k]
4950 * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k]
4951 * locals->UrgentBurstFactorCursorPre[k]);
4952 }
4953 locals->ImmediateFlipSupportedForState[i][j] = true;
4954 if (mode_lib->vba.total_dcn_read_bw_with_flip
4955 > locals->ReturnBWPerState[i][0]) {
4956 locals->ImmediateFlipSupportedForState[i][j] = false;
4957 }
4958 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4959 if (locals->ImmediateFlipSupportedForPipe[k] == false) {
4960 locals->ImmediateFlipSupportedForState[i][j] = false;
4961 }
4962 }
4963 } else {
4964 locals->ImmediateFlipSupportedForState[i][j] = false;
4965 }
4966 mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
4967 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4968 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4969 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
4970 CalculateWatermarksAndDRAMSpeedChangeSupport(
4971 mode_lib,
4972 mode_lib->vba.PrefetchMode[i][j],
4973 mode_lib->vba.NumberOfActivePlanes,
4974 mode_lib->vba.MaxLineBufferLines,
4975 mode_lib->vba.LineBufferSize,
4976 mode_lib->vba.DPPOutputBufferPixels,
4977 mode_lib->vba.DETBufferSizeInKByte,
4978 mode_lib->vba.WritebackInterfaceLumaBufferSize,
4979 mode_lib->vba.WritebackInterfaceChromaBufferSize,
4980 mode_lib->vba.DCFCLKPerState[i],
4981 mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
4982 locals->ReturnBWPerState[i][0],
4983 mode_lib->vba.GPUVMEnable,
4984 locals->dpte_group_bytes,
4985 mode_lib->vba.MetaChunkSize,
4986 mode_lib->vba.UrgentLatency,
4987 mode_lib->vba.ExtraLatency,
4988 mode_lib->vba.WritebackLatency,
4989 mode_lib->vba.WritebackChunkSize,
4990 mode_lib->vba.SOCCLKPerState[i],
4991 mode_lib->vba.DRAMClockChangeLatency,
4992 mode_lib->vba.SRExitTime,
4993 mode_lib->vba.SREnterPlusExitTime,
4994 mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
4995 locals->NoOfDPPThisState,
4996 mode_lib->vba.DCCEnable,
4997 locals->RequiredDPPCLKThisState,
4998 locals->SwathWidthYSingleDPP,
4999 locals->SwathHeightYThisState,
5000 locals->ReadBandwidthLuma,
5001 locals->SwathHeightCThisState,
5002 locals->ReadBandwidthChroma,
5003 mode_lib->vba.LBBitPerPixel,
5004 locals->SwathWidthYThisState,
5005 mode_lib->vba.HRatio,
5006 mode_lib->vba.vtaps,
5007 mode_lib->vba.VTAPsChroma,
5008 mode_lib->vba.VRatio,
5009 mode_lib->vba.HTotal,
5010 mode_lib->vba.PixelClock,
5011 mode_lib->vba.BlendingAndTiming,
5012 locals->BytePerPixelInDETY,
5013 locals->BytePerPixelInDETC,
5014 mode_lib->vba.WritebackEnable,
5015 mode_lib->vba.WritebackPixelFormat,
5016 mode_lib->vba.WritebackDestinationWidth,
5017 mode_lib->vba.WritebackDestinationHeight,
5018 mode_lib->vba.WritebackSourceHeight,
5019 &locals->DRAMClockChangeSupport[i][j],
5020 &mode_lib->vba.UrgentWatermark,
5021 &mode_lib->vba.WritebackUrgentWatermark,
5022 &mode_lib->vba.DRAMClockChangeWatermark,
5023 &mode_lib->vba.WritebackDRAMClockChangeWatermark,
5024 &mode_lib->vba.StutterExitWatermark,
5025 &mode_lib->vba.StutterEnterPlusExitWatermark,
5026 &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
5027 }
5028 }
5029
5030 /*Vertical Active BW support*/
5031 {
5032 double MaxTotalVActiveRDBandwidth = 0.0;
5033 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
5034 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k];
5035 }
5036 for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
5037 locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(
5038 locals->IdealSDPPortBandwidthPerState[i][0] *
5039 mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation
5040 / 100.0, mode_lib->vba.DRAMSpeedPerState[i] *
5041 mode_lib->vba.NumberOfChannels *
5042 mode_lib->vba.DRAMChannelWidth *
5043 mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
5044 / 100.0);
5045
5046 if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) {
5047 locals->TotalVerticalActiveBandwidthSupport[i][0] = true;
5048 } else {
5049 locals->TotalVerticalActiveBandwidthSupport[i][0] = false;
5050 }
5051 }
5052 }
5053
5054 /*PTE Buffer Size Check*/
5055
5056 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
5057 for (j = 0; j < 2; j++) {
5058 locals->PTEBufferSizeNotExceeded[i][j] = true;
5059 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5060 if (locals->PTEBufferSizeNotExceededY[i][j][k] == false
5061 || locals->PTEBufferSizeNotExceededC[i][j][k] == false) {
5062 locals->PTEBufferSizeNotExceeded[i][j] = false;
5063 }
5064 }
5065 }
5066 }
5067 /*Cursor Support Check*/
5068
5069 mode_lib->vba.CursorSupport = true;
5070 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5071 if (mode_lib->vba.CursorWidth[k][0] > 0.0) {
5072 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
5073 if (mode_lib->vba.CursorBPP[k][m] == 64 && mode_lib->vba.Cursor64BppSupport == false) {
5074 mode_lib->vba.CursorSupport = false;
5075 }
5076 }
5077 }
5078 }
5079 /*Valid Pitch Check*/
5080
5081 mode_lib->vba.PitchSupport = true;
5082 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5083 locals->AlignedYPitch[k] = dml_ceil(
5084 dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]),
5085 locals->MacroTileWidthY[k]);
5086 if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) {
5087 mode_lib->vba.PitchSupport = false;
5088 }
5089 if (mode_lib->vba.DCCEnable[k] == true) {
5090 locals->AlignedDCCMetaPitch[k] = dml_ceil(
5091 dml_max(
5092 mode_lib->vba.DCCMetaPitchY[k],
5093 mode_lib->vba.ViewportWidth[k]),
5094 64.0 * locals->Read256BlockWidthY[k]);
5095 } else {
5096 locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k];
5097 }
5098 if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) {
5099 mode_lib->vba.PitchSupport = false;
5100 }
5101 if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
5102 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
5103 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
5104 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
5105 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
5106 locals->AlignedCPitch[k] = dml_ceil(
5107 dml_max(
5108 mode_lib->vba.PitchC[k],
5109 mode_lib->vba.ViewportWidth[k] / 2.0),
5110 locals->MacroTileWidthC[k]);
5111 } else {
5112 locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k];
5113 }
5114 if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) {
5115 mode_lib->vba.PitchSupport = false;
5116 }
5117 }
5118 /*Mode Support, Voltage State and SOC Configuration*/
5119
5120 for (i = mode_lib->vba.soc.num_states; i >= 0; i--) {
5121 for (j = 0; j < 2; j++) {
5122 enum dm_validation_status status = DML_VALIDATION_OK;
5123
5124 if (mode_lib->vba.ScaleRatioAndTapsSupport != true) {
5125 status = DML_FAIL_SCALE_RATIO_TAP;
5126 } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) {
5127 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5128 } else if (locals->ViewportSizeSupport[i][0] != true) {
5129 status = DML_FAIL_VIEWPORT_SIZE;
5130 } else if (locals->DIOSupport[i] != true) {
5131 status = DML_FAIL_DIO_SUPPORT;
5132 } else if (locals->NotEnoughDSCUnits[i] != false) {
5133 status = DML_FAIL_NOT_ENOUGH_DSC;
5134 } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) {
5135 status = DML_FAIL_DSC_CLK_REQUIRED;
5136 } else if (locals->ROBSupport[i][0] != true) {
5137 status = DML_FAIL_REORDERING_BUFFER;
5138 } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) {
5139 status = DML_FAIL_DISPCLK_DPPCLK;
5140 } else if (locals->TotalAvailablePipesSupport[i][j] != true) {
5141 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5142 } else if (mode_lib->vba.NumberOfOTGSupport != true) {
5143 status = DML_FAIL_NUM_OTG;
5144 } else if (mode_lib->vba.WritebackModeSupport != true) {
5145 status = DML_FAIL_WRITEBACK_MODE;
5146 } else if (mode_lib->vba.WritebackLatencySupport != true) {
5147 status = DML_FAIL_WRITEBACK_LATENCY;
5148 } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) {
5149 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5150 } else if (mode_lib->vba.CursorSupport != true) {
5151 status = DML_FAIL_CURSOR_SUPPORT;
5152 } else if (mode_lib->vba.PitchSupport != true) {
5153 status = DML_FAIL_PITCH_SUPPORT;
5154 } else if (locals->TotalVerticalActiveBandwidthSupport[i][0] != true) {
5155 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5156 } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) {
5157 status = DML_FAIL_PTE_BUFFER_SIZE;
5158 } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) {
5159 status = DML_FAIL_DSC_INPUT_BPC;
5160 } else if ((mode_lib->vba.HostVMEnable != false
5161 && locals->ImmediateFlipSupportedForState[i][j] != true)) {
5162 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5163 } else if (locals->PrefetchSupported[i][j] != true) {
5164 status = DML_FAIL_PREFETCH_SUPPORT;
5165 } else if (locals->VRatioInPrefetchSupported[i][j] != true) {
5166 status = DML_FAIL_V_RATIO_PREFETCH;
5167 }
5168
5169 if (status == DML_VALIDATION_OK) {
5170 locals->ModeSupport[i][j] = true;
5171 } else {
5172 locals->ModeSupport[i][j] = false;
5173 }
5174 locals->ValidationStatus[i] = status;
5175 }
5176 }
5177 {
5178 unsigned int MaximumMPCCombine = 0;
5179 mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1;
5180 for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) {
5181 if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) {
5182 mode_lib->vba.VoltageLevel = i;
5183 if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false
5184 || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible
5185 || (mode_lib->vba.WhenToDoMPCCombine == dm_mpc_reduce_voltage_and_clocks
5186 && ((locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vactive
5187 && locals->DRAMClockChangeSupport[i][0] != dm_dram_clock_change_vactive)
5188 || (locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vblank
5189 && locals->DRAMClockChangeSupport[i][0] == dm_dram_clock_change_unsupported))))) {
5190 MaximumMPCCombine = 1;
5191 } else {
5192 MaximumMPCCombine = 0;
5193 }
5194 break;
5195 }
5196 }
5197 mode_lib->vba.ImmediateFlipSupport =
5198 locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
5199 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5200 mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
5201 locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
5202 }
5203 mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
5204 mode_lib->vba.maxMpcComb = MaximumMPCCombine;
5205 }
5206 mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel];
5207 mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
5208 mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
5209 mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
5210 mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
5211 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5212 if (mode_lib->vba.BlendingAndTiming[k] == k) {
5213 mode_lib->vba.ODMCombineEnabled[k] =
5214 locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
5215 } else {
5216 mode_lib->vba.ODMCombineEnabled[k] = dm_odm_combine_mode_disabled;
5217 }
5218 mode_lib->vba.DSCEnabled[k] =
5219 locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
5220 mode_lib->vba.OutputBpp[k] =
5221 locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k];
5222 }
5223 }
5224
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,double DETBufferSizeInKByte,unsigned int WritebackInterfaceLumaBufferSize,unsigned int WritebackInterfaceChromaBufferSize,double DCFCLK,double UrgentOutOfOrderReturn,double ReturnBW,bool GPUVMEnable,int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],double SwathWidthSingleDPPY[],unsigned int SwathHeightY[],double ReadBandwidthPlaneLuma[],unsigned int SwathHeightC[],double ReadBandwidthPlaneChroma[],unsigned int LBBitPerPixel[],double SwathWidthY[],double HRatio[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5225 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5226 struct display_mode_lib *mode_lib,
5227 unsigned int PrefetchMode,
5228 unsigned int NumberOfActivePlanes,
5229 unsigned int MaxLineBufferLines,
5230 unsigned int LineBufferSize,
5231 unsigned int DPPOutputBufferPixels,
5232 double DETBufferSizeInKByte,
5233 unsigned int WritebackInterfaceLumaBufferSize,
5234 unsigned int WritebackInterfaceChromaBufferSize,
5235 double DCFCLK,
5236 double UrgentOutOfOrderReturn,
5237 double ReturnBW,
5238 bool GPUVMEnable,
5239 int dpte_group_bytes[],
5240 unsigned int MetaChunkSize,
5241 double UrgentLatency,
5242 double ExtraLatency,
5243 double WritebackLatency,
5244 double WritebackChunkSize,
5245 double SOCCLK,
5246 double DRAMClockChangeLatency,
5247 double SRExitTime,
5248 double SREnterPlusExitTime,
5249 double DCFCLKDeepSleep,
5250 int DPPPerPlane[],
5251 bool DCCEnable[],
5252 double DPPCLK[],
5253 double SwathWidthSingleDPPY[],
5254 unsigned int SwathHeightY[],
5255 double ReadBandwidthPlaneLuma[],
5256 unsigned int SwathHeightC[],
5257 double ReadBandwidthPlaneChroma[],
5258 unsigned int LBBitPerPixel[],
5259 double SwathWidthY[],
5260 double HRatio[],
5261 unsigned int vtaps[],
5262 unsigned int VTAPsChroma[],
5263 double VRatio[],
5264 unsigned int HTotal[],
5265 double PixelClock[],
5266 unsigned int BlendingAndTiming[],
5267 double BytePerPixelDETY[],
5268 double BytePerPixelDETC[],
5269 bool WritebackEnable[],
5270 enum source_format_class WritebackPixelFormat[],
5271 double WritebackDestinationWidth[],
5272 double WritebackDestinationHeight[],
5273 double WritebackSourceHeight[],
5274 enum clock_change_support *DRAMClockChangeSupport,
5275 double *UrgentWatermark,
5276 double *WritebackUrgentWatermark,
5277 double *DRAMClockChangeWatermark,
5278 double *WritebackDRAMClockChangeWatermark,
5279 double *StutterExitWatermark,
5280 double *StutterEnterPlusExitWatermark,
5281 double *MinActiveDRAMClockChangeLatencySupported)
5282 {
5283 double EffectiveLBLatencyHidingY;
5284 double EffectiveLBLatencyHidingC;
5285 double DPPOutputBufferLinesY;
5286 double DPPOutputBufferLinesC;
5287 double DETBufferSizeY;
5288 double DETBufferSizeC;
5289 double LinesInDETY[DC__NUM_DPP__MAX];
5290 double LinesInDETC;
5291 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5292 unsigned int LinesInDETCRoundedDownToSwath;
5293 double FullDETBufferingTimeY[DC__NUM_DPP__MAX];
5294 double FullDETBufferingTimeC;
5295 double ActiveDRAMClockChangeLatencyMarginY;
5296 double ActiveDRAMClockChangeLatencyMarginC;
5297 double WritebackDRAMClockChangeLatencyMargin;
5298 double PlaneWithMinActiveDRAMClockChangeMargin;
5299 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5300 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5301 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5302 unsigned int k, j;
5303
5304 mode_lib->vba.TotalActiveDPP = 0;
5305 mode_lib->vba.TotalDCCActiveDPP = 0;
5306 for (k = 0; k < NumberOfActivePlanes; ++k) {
5307 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5308 if (DCCEnable[k] == true) {
5309 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5310 }
5311 }
5312
5313 mode_lib->vba.TotalDataReadBandwidth = 0;
5314 for (k = 0; k < NumberOfActivePlanes; ++k) {
5315 mode_lib->vba.TotalDataReadBandwidth = mode_lib->vba.TotalDataReadBandwidth
5316 + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
5317 }
5318
5319 *UrgentWatermark = UrgentLatency + ExtraLatency;
5320
5321 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5322
5323 mode_lib->vba.TotalActiveWriteback = 0;
5324 for (k = 0; k < NumberOfActivePlanes; ++k) {
5325 if (WritebackEnable[k] == true) {
5326 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5327 }
5328 }
5329
5330 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5331 *WritebackUrgentWatermark = WritebackLatency;
5332 } else {
5333 *WritebackUrgentWatermark = WritebackLatency
5334 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5335 }
5336
5337 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5338 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5339 } else {
5340 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency
5341 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5342 }
5343
5344 for (k = 0; k < NumberOfActivePlanes; ++k) {
5345
5346 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines,
5347 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1))
5348 - (vtaps[k] - 1);
5349
5350 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines,
5351 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / 2 / dml_max(HRatio[k] / 2, 1.0)), 1))
5352 - (VTAPsChroma[k] - 1);
5353
5354 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k]
5355 * (HTotal[k] / PixelClock[k]);
5356
5357 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC
5358 / (VRatio[k] / 2) * (HTotal[k] / PixelClock[k]);
5359
5360 if (SwathWidthY[k] > 2 * DPPOutputBufferPixels) {
5361 DPPOutputBufferLinesY = (double) DPPOutputBufferPixels / SwathWidthY[k];
5362 } else if (SwathWidthY[k] > DPPOutputBufferPixels) {
5363 DPPOutputBufferLinesY = 0.5;
5364 } else {
5365 DPPOutputBufferLinesY = 1;
5366 }
5367
5368 if (SwathWidthY[k] / 2.0 > 2 * DPPOutputBufferPixels) {
5369 DPPOutputBufferLinesC = (double) DPPOutputBufferPixels
5370 / (SwathWidthY[k] / 2.0);
5371 } else if (SwathWidthY[k] / 2.0 > DPPOutputBufferPixels) {
5372 DPPOutputBufferLinesC = 0.5;
5373 } else {
5374 DPPOutputBufferLinesC = 1;
5375 }
5376
5377 CalculateDETBufferSize(
5378 DETBufferSizeInKByte,
5379 SwathHeightY[k],
5380 SwathHeightC[k],
5381 &DETBufferSizeY,
5382 &DETBufferSizeC);
5383
5384 LinesInDETY[k] = DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5385 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5386 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k]
5387 * (HTotal[k] / PixelClock[k]) / VRatio[k];
5388 if (BytePerPixelDETC[k] > 0) {
5389 LinesInDETC = DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0);
5390 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5391 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath
5392 * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2);
5393 } else {
5394 LinesInDETC = 0;
5395 FullDETBufferingTimeC = 999999;
5396 }
5397
5398 ActiveDRAMClockChangeLatencyMarginY = HTotal[k] / PixelClock[k]
5399 * DPPOutputBufferLinesY + EffectiveLBLatencyHidingY
5400 + FullDETBufferingTimeY[k] - *DRAMClockChangeWatermark;
5401
5402 if (NumberOfActivePlanes > 1) {
5403 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5404 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5405 }
5406
5407 if (BytePerPixelDETC[k] > 0) {
5408 ActiveDRAMClockChangeLatencyMarginC = HTotal[k] / PixelClock[k]
5409 * DPPOutputBufferLinesC + EffectiveLBLatencyHidingC
5410 + FullDETBufferingTimeC - *DRAMClockChangeWatermark;
5411 if (NumberOfActivePlanes > 1) {
5412 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5413 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / (VRatio[k] / 2);
5414 }
5415 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
5416 ActiveDRAMClockChangeLatencyMarginY,
5417 ActiveDRAMClockChangeLatencyMarginC);
5418 } else {
5419 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5420 }
5421
5422 if (WritebackEnable[k] == true) {
5423 if (WritebackPixelFormat[k] == dm_444_32) {
5424 WritebackDRAMClockChangeLatencyMargin = (WritebackInterfaceLumaBufferSize
5425 + WritebackInterfaceChromaBufferSize) / (WritebackDestinationWidth[k]
5426 * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k]
5427 / PixelClock[k]) * 4) - *WritebackDRAMClockChangeWatermark;
5428 } else {
5429 WritebackDRAMClockChangeLatencyMargin = dml_min(
5430 WritebackInterfaceLumaBufferSize * 8.0 / 10,
5431 2 * WritebackInterfaceChromaBufferSize * 8.0 / 10) / (WritebackDestinationWidth[k]
5432 * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]))
5433 - *WritebackDRAMClockChangeWatermark;
5434 }
5435 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
5436 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k],
5437 WritebackDRAMClockChangeLatencyMargin);
5438 }
5439 }
5440
5441 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5442 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5443 for (k = 0; k < NumberOfActivePlanes; ++k) {
5444 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
5445 < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5446 mode_lib->vba.MinActiveDRAMClockChangeMargin =
5447 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5448 if (BlendingAndTiming[k] == k) {
5449 PlaneWithMinActiveDRAMClockChangeMargin = k;
5450 } else {
5451 for (j = 0; j < NumberOfActivePlanes; ++j) {
5452 if (BlendingAndTiming[k] == j) {
5453 PlaneWithMinActiveDRAMClockChangeMargin = j;
5454 }
5455 }
5456 }
5457 }
5458 }
5459
5460 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5461
5462 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5463 for (k = 0; k < NumberOfActivePlanes; ++k) {
5464 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k))
5465 && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5466 && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
5467 < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5468 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank =
5469 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5470 }
5471 }
5472
5473 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5474 for (k = 0; k < NumberOfActivePlanes; ++k) {
5475 if (BlendingAndTiming[k] == k) {
5476 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5477 }
5478 }
5479
5480 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5481 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5482 } else if (((mode_lib->vba.SynchronizedVBlank == true
5483 || mode_lib->vba.TotalNumberOfActiveOTG == 1
5484 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0)
5485 && PrefetchMode == 0)) {
5486 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5487 } else {
5488 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5489 }
5490
5491 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5492 for (k = 0; k < NumberOfActivePlanes; ++k) {
5493 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5494 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k]
5495 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]))
5496 * (HTotal[k] / PixelClock[k]) / VRatio[k];
5497 }
5498 }
5499
5500 *StutterExitWatermark = SRExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
5501 + ExtraLatency + 10 / DCFCLKDeepSleep;
5502 *StutterEnterPlusExitWatermark = dml_max(
5503 SREnterPlusExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
5504 + ExtraLatency + 10 / DCFCLKDeepSleep,
5505 TimeToFinishSwathTransferStutterCriticalPlane);
5506
5507 }
5508
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,double BytePerPixelDETY[],double BytePerPixelDETC[],double VRatio[],double SwathWidthY[],int DPPPerPlane[],double HRatio[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double * DCFCLKDeepSleep)5509 static void CalculateDCFCLKDeepSleep(
5510 struct display_mode_lib *mode_lib,
5511 unsigned int NumberOfActivePlanes,
5512 double BytePerPixelDETY[],
5513 double BytePerPixelDETC[],
5514 double VRatio[],
5515 double SwathWidthY[],
5516 int DPPPerPlane[],
5517 double HRatio[],
5518 double PixelClock[],
5519 double PSCL_THROUGHPUT[],
5520 double PSCL_THROUGHPUT_CHROMA[],
5521 double DPPCLK[],
5522 double *DCFCLKDeepSleep)
5523 {
5524 unsigned int k;
5525 double DisplayPipeLineDeliveryTimeLuma;
5526 double DisplayPipeLineDeliveryTimeChroma;
5527 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5528
5529 for (k = 0; k < NumberOfActivePlanes; ++k) {
5530 if (VRatio[k] <= 1) {
5531 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k]
5532 / HRatio[k] / PixelClock[k];
5533 } else {
5534 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k]
5535 / DPPCLK[k];
5536 }
5537 if (BytePerPixelDETC[k] == 0) {
5538 DisplayPipeLineDeliveryTimeChroma = 0;
5539 } else {
5540 if (VRatio[k] / 2 <= 1) {
5541 DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
5542 * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
5543 } else {
5544 DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
5545 / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5546 }
5547 }
5548
5549 if (BytePerPixelDETC[k] > 0) {
5550 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
5551 1.1 * SwathWidthY[k] * dml_ceil(BytePerPixelDETY[k], 1)
5552 / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5553 1.1 * SwathWidthY[k] / 2.0
5554 * dml_ceil(BytePerPixelDETC[k], 2) / 32.0
5555 / DisplayPipeLineDeliveryTimeChroma);
5556 } else {
5557 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k]
5558 * dml_ceil(BytePerPixelDETY[k], 1) / 64.0
5559 / DisplayPipeLineDeliveryTimeLuma;
5560 }
5561 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
5562 mode_lib->vba.DCFCLKDeepSleepPerPlane[k],
5563 PixelClock[k] / 16);
5564
5565 }
5566
5567 *DCFCLKDeepSleep = 8;
5568 for (k = 0; k < NumberOfActivePlanes; ++k) {
5569 *DCFCLKDeepSleep = dml_max(
5570 *DCFCLKDeepSleep,
5571 mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5572 }
5573 }
5574
CalculateDETBufferSize(double DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double * DETBufferSizeY,double * DETBufferSizeC)5575 static void CalculateDETBufferSize(
5576 double DETBufferSizeInKByte,
5577 unsigned int SwathHeightY,
5578 unsigned int SwathHeightC,
5579 double *DETBufferSizeY,
5580 double *DETBufferSizeC)
5581 {
5582 if (SwathHeightC == 0) {
5583 *DETBufferSizeY = DETBufferSizeInKByte * 1024;
5584 *DETBufferSizeC = 0;
5585 } else if (SwathHeightY <= SwathHeightC) {
5586 *DETBufferSizeY = DETBufferSizeInKByte * 1024 / 2;
5587 *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 2;
5588 } else {
5589 *DETBufferSizeY = DETBufferSizeInKByte * 1024 * 2 / 3;
5590 *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 3;
5591 }
5592 }
5593
CalculateUrgentBurstFactor(unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,unsigned int SwathWidthY,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioPreY,double VRatioPreC,double BytePerPixelInDETY,double BytePerPixelInDETC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorCursorPre,double * UrgentBurstFactorLuma,double * UrgentBurstFactorLumaPre,double * UrgentBurstFactorChroma,double * UrgentBurstFactorChromaPre,unsigned int * NotEnoughUrgentLatencyHiding,unsigned int * NotEnoughUrgentLatencyHidingPre)5594 static void CalculateUrgentBurstFactor(
5595 unsigned int DETBufferSizeInKByte,
5596 unsigned int SwathHeightY,
5597 unsigned int SwathHeightC,
5598 unsigned int SwathWidthY,
5599 double LineTime,
5600 double UrgentLatency,
5601 double CursorBufferSize,
5602 unsigned int CursorWidth,
5603 unsigned int CursorBPP,
5604 double VRatio,
5605 double VRatioPreY,
5606 double VRatioPreC,
5607 double BytePerPixelInDETY,
5608 double BytePerPixelInDETC,
5609 double *UrgentBurstFactorCursor,
5610 double *UrgentBurstFactorCursorPre,
5611 double *UrgentBurstFactorLuma,
5612 double *UrgentBurstFactorLumaPre,
5613 double *UrgentBurstFactorChroma,
5614 double *UrgentBurstFactorChromaPre,
5615 unsigned int *NotEnoughUrgentLatencyHiding,
5616 unsigned int *NotEnoughUrgentLatencyHidingPre)
5617 {
5618 double LinesInDETLuma;
5619 double LinesInDETChroma;
5620 unsigned int LinesInCursorBuffer;
5621 double CursorBufferSizeInTime;
5622 double CursorBufferSizeInTimePre;
5623 double DETBufferSizeInTimeLuma;
5624 double DETBufferSizeInTimeLumaPre;
5625 double DETBufferSizeInTimeChroma;
5626 double DETBufferSizeInTimeChromaPre;
5627 double DETBufferSizeY;
5628 double DETBufferSizeC;
5629
5630 *NotEnoughUrgentLatencyHiding = 0;
5631 *NotEnoughUrgentLatencyHidingPre = 0;
5632
5633 if (CursorWidth > 0) {
5634 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(
5635 dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5636 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5637 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5638 *NotEnoughUrgentLatencyHiding = 1;
5639 *UrgentBurstFactorCursor = 0;
5640 } else {
5641 *UrgentBurstFactorCursor = CursorBufferSizeInTime
5642 / (CursorBufferSizeInTime - UrgentLatency);
5643 }
5644 if (VRatioPreY > 0) {
5645 CursorBufferSizeInTimePre = LinesInCursorBuffer * LineTime / VRatioPreY;
5646 if (CursorBufferSizeInTimePre - UrgentLatency <= 0) {
5647 *NotEnoughUrgentLatencyHidingPre = 1;
5648 *UrgentBurstFactorCursorPre = 0;
5649 } else {
5650 *UrgentBurstFactorCursorPre = CursorBufferSizeInTimePre
5651 / (CursorBufferSizeInTimePre - UrgentLatency);
5652 }
5653 } else {
5654 *UrgentBurstFactorCursorPre = 1;
5655 }
5656 }
5657
5658 CalculateDETBufferSize(
5659 DETBufferSizeInKByte,
5660 SwathHeightY,
5661 SwathHeightC,
5662 &DETBufferSizeY,
5663 &DETBufferSizeC);
5664
5665 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / SwathWidthY;
5666 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5667 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5668 *NotEnoughUrgentLatencyHiding = 1;
5669 *UrgentBurstFactorLuma = 0;
5670 } else {
5671 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma
5672 / (DETBufferSizeInTimeLuma - UrgentLatency);
5673 }
5674 if (VRatioPreY > 0) {
5675 DETBufferSizeInTimeLumaPre = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime
5676 / VRatioPreY;
5677 if (DETBufferSizeInTimeLumaPre - UrgentLatency <= 0) {
5678 *NotEnoughUrgentLatencyHidingPre = 1;
5679 *UrgentBurstFactorLumaPre = 0;
5680 } else {
5681 *UrgentBurstFactorLumaPre = DETBufferSizeInTimeLumaPre
5682 / (DETBufferSizeInTimeLumaPre - UrgentLatency);
5683 }
5684 } else {
5685 *UrgentBurstFactorLumaPre = 1;
5686 }
5687
5688 if (BytePerPixelInDETC > 0) {
5689 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2);
5690 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime
5691 / (VRatio / 2);
5692 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5693 *NotEnoughUrgentLatencyHiding = 1;
5694 *UrgentBurstFactorChroma = 0;
5695 } else {
5696 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
5697 / (DETBufferSizeInTimeChroma - UrgentLatency);
5698 }
5699 if (VRatioPreC > 0) {
5700 DETBufferSizeInTimeChromaPre = dml_floor(LinesInDETChroma, SwathHeightC)
5701 * LineTime / VRatioPreC;
5702 if (DETBufferSizeInTimeChromaPre - UrgentLatency <= 0) {
5703 *NotEnoughUrgentLatencyHidingPre = 1;
5704 *UrgentBurstFactorChromaPre = 0;
5705 } else {
5706 *UrgentBurstFactorChromaPre = DETBufferSizeInTimeChromaPre
5707 / (DETBufferSizeInTimeChromaPre - UrgentLatency);
5708 }
5709 } else {
5710 *UrgentBurstFactorChromaPre = 1;
5711 }
5712 }
5713 }
5714
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],int DPPPerPlane[],double HRatio[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double BytePerPixelDETC[],enum scan_direction_class SourceScan[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])5715 static void CalculatePixelDeliveryTimes(
5716 unsigned int NumberOfActivePlanes,
5717 double VRatio[],
5718 double VRatioPrefetchY[],
5719 double VRatioPrefetchC[],
5720 unsigned int swath_width_luma_ub[],
5721 unsigned int swath_width_chroma_ub[],
5722 int DPPPerPlane[],
5723 double HRatio[],
5724 double PixelClock[],
5725 double PSCL_THROUGHPUT[],
5726 double PSCL_THROUGHPUT_CHROMA[],
5727 double DPPCLK[],
5728 double BytePerPixelDETC[],
5729 enum scan_direction_class SourceScan[],
5730 unsigned int BlockWidth256BytesY[],
5731 unsigned int BlockHeight256BytesY[],
5732 unsigned int BlockWidth256BytesC[],
5733 unsigned int BlockHeight256BytesC[],
5734 double DisplayPipeLineDeliveryTimeLuma[],
5735 double DisplayPipeLineDeliveryTimeChroma[],
5736 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5737 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5738 double DisplayPipeRequestDeliveryTimeLuma[],
5739 double DisplayPipeRequestDeliveryTimeChroma[],
5740 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5741 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
5742 {
5743 double req_per_swath_ub;
5744 unsigned int k;
5745
5746 for (k = 0; k < NumberOfActivePlanes; ++k) {
5747 if (VRatio[k] <= 1) {
5748 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k]
5749 / HRatio[k] / PixelClock[k];
5750 } else {
5751 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k]
5752 / PSCL_THROUGHPUT[k] / DPPCLK[k];
5753 }
5754
5755 if (BytePerPixelDETC[k] == 0) {
5756 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5757 } else {
5758 if (VRatio[k] / 2 <= 1) {
5759 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
5760 * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
5761 } else {
5762 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
5763 / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5764 }
5765 }
5766
5767 if (VRatioPrefetchY[k] <= 1) {
5768 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
5769 * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5770 } else {
5771 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
5772 / PSCL_THROUGHPUT[k] / DPPCLK[k];
5773 }
5774
5775 if (BytePerPixelDETC[k] == 0) {
5776 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5777 } else {
5778 if (VRatioPrefetchC[k] <= 1) {
5779 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
5780 swath_width_chroma_ub[k] * DPPPerPlane[k]
5781 / (HRatio[k] / 2) / PixelClock[k];
5782 } else {
5783 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
5784 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5785 }
5786 }
5787 }
5788
5789 for (k = 0; k < NumberOfActivePlanes; ++k) {
5790 if (SourceScan[k] == dm_horz) {
5791 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5792 } else {
5793 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5794 }
5795 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k]
5796 / req_per_swath_ub;
5797 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
5798 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5799 if (BytePerPixelDETC[k] == 0) {
5800 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5801 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5802 } else {
5803 if (SourceScan[k] == dm_horz) {
5804 req_per_swath_ub = swath_width_chroma_ub[k]
5805 / BlockWidth256BytesC[k];
5806 } else {
5807 req_per_swath_ub = swath_width_chroma_ub[k]
5808 / BlockHeight256BytesC[k];
5809 }
5810 DisplayPipeRequestDeliveryTimeChroma[k] =
5811 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5812 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
5813 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5814 }
5815 }
5816 }
5817
CalculateMetaAndPTETimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],double VRatio[],double VRatioPrefetchY[],double VRatioPrefetchC[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],double BytePerPixelDETY[],double BytePerPixelDETC[],enum scan_direction_class SourceScan[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_height[],unsigned int meta_req_width[],unsigned int meta_req_height[],int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double TimePerMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5818 static void CalculateMetaAndPTETimes(
5819 unsigned int NumberOfActivePlanes,
5820 bool GPUVMEnable,
5821 unsigned int MetaChunkSize,
5822 unsigned int MinMetaChunkSizeBytes,
5823 unsigned int GPUVMMaxPageTableLevels,
5824 unsigned int HTotal[],
5825 double VRatio[],
5826 double VRatioPrefetchY[],
5827 double VRatioPrefetchC[],
5828 double DestinationLinesToRequestRowInVBlank[],
5829 double DestinationLinesToRequestRowInImmediateFlip[],
5830 double DestinationLinesToRequestVMInVBlank[],
5831 double DestinationLinesToRequestVMInImmediateFlip[],
5832 bool DCCEnable[],
5833 double PixelClock[],
5834 double BytePerPixelDETY[],
5835 double BytePerPixelDETC[],
5836 enum scan_direction_class SourceScan[],
5837 unsigned int dpte_row_height[],
5838 unsigned int dpte_row_height_chroma[],
5839 unsigned int meta_row_width[],
5840 unsigned int meta_row_height[],
5841 unsigned int meta_req_width[],
5842 unsigned int meta_req_height[],
5843 int dpte_group_bytes[],
5844 unsigned int PTERequestSizeY[],
5845 unsigned int PTERequestSizeC[],
5846 unsigned int PixelPTEReqWidthY[],
5847 unsigned int PixelPTEReqHeightY[],
5848 unsigned int PixelPTEReqWidthC[],
5849 unsigned int PixelPTEReqHeightC[],
5850 unsigned int dpte_row_width_luma_ub[],
5851 unsigned int dpte_row_width_chroma_ub[],
5852 unsigned int vm_group_bytes[],
5853 unsigned int dpde0_bytes_per_frame_ub_l[],
5854 unsigned int dpde0_bytes_per_frame_ub_c[],
5855 unsigned int meta_pte_bytes_per_frame_ub_l[],
5856 unsigned int meta_pte_bytes_per_frame_ub_c[],
5857 double DST_Y_PER_PTE_ROW_NOM_L[],
5858 double DST_Y_PER_PTE_ROW_NOM_C[],
5859 double DST_Y_PER_META_ROW_NOM_L[],
5860 double TimePerMetaChunkNominal[],
5861 double TimePerMetaChunkVBlank[],
5862 double TimePerMetaChunkFlip[],
5863 double time_per_pte_group_nom_luma[],
5864 double time_per_pte_group_vblank_luma[],
5865 double time_per_pte_group_flip_luma[],
5866 double time_per_pte_group_nom_chroma[],
5867 double time_per_pte_group_vblank_chroma[],
5868 double time_per_pte_group_flip_chroma[],
5869 double TimePerVMGroupVBlank[],
5870 double TimePerVMGroupFlip[],
5871 double TimePerVMRequestVBlank[],
5872 double TimePerVMRequestFlip[])
5873 {
5874 unsigned int meta_chunk_width;
5875 unsigned int min_meta_chunk_width;
5876 unsigned int meta_chunk_per_row_int;
5877 unsigned int meta_row_remainder;
5878 unsigned int meta_chunk_threshold;
5879 unsigned int meta_chunks_per_row_ub;
5880 unsigned int dpte_group_width_luma;
5881 unsigned int dpte_group_width_chroma;
5882 unsigned int dpte_groups_per_row_luma_ub;
5883 unsigned int dpte_groups_per_row_chroma_ub;
5884 unsigned int num_group_per_lower_vm_stage;
5885 unsigned int num_req_per_lower_vm_stage;
5886 unsigned int k;
5887
5888 for (k = 0; k < NumberOfActivePlanes; ++k) {
5889 if (GPUVMEnable == true) {
5890 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5891 if (BytePerPixelDETC[k] == 0) {
5892 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5893 } else {
5894 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / (VRatio[k] / 2);
5895 }
5896 } else {
5897 DST_Y_PER_PTE_ROW_NOM_L[k] = 0;
5898 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5899 }
5900 if (DCCEnable[k] == true) {
5901 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5902 } else {
5903 DST_Y_PER_META_ROW_NOM_L[k] = 0;
5904 }
5905 }
5906
5907 for (k = 0; k < NumberOfActivePlanes; ++k) {
5908 if (DCCEnable[k] == true) {
5909 meta_chunk_width = MetaChunkSize * 1024 * 256
5910 / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
5911 min_meta_chunk_width = MinMetaChunkSizeBytes * 256
5912 / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
5913 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5914 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5915 if (SourceScan[k] == dm_horz) {
5916 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5917 } else {
5918 meta_chunk_threshold = 2 * min_meta_chunk_width
5919 - meta_req_height[k];
5920 }
5921 if (meta_row_remainder <= meta_chunk_threshold) {
5922 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5923 } else {
5924 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5925 }
5926 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k]
5927 / PixelClock[k] / meta_chunks_per_row_ub;
5928 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k]
5929 * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5930 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k]
5931 * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5932 } else {
5933 TimePerMetaChunkNominal[k] = 0;
5934 TimePerMetaChunkVBlank[k] = 0;
5935 TimePerMetaChunkFlip[k] = 0;
5936 }
5937 }
5938
5939 for (k = 0; k < NumberOfActivePlanes; ++k) {
5940 if (GPUVMEnable == true) {
5941 if (SourceScan[k] == dm_horz) {
5942 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
5943 * PixelPTEReqWidthY[k];
5944 } else {
5945 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
5946 * PixelPTEReqHeightY[k];
5947 }
5948 dpte_groups_per_row_luma_ub = dml_ceil(
5949 (float) dpte_row_width_luma_ub[k] / dpte_group_width_luma,
5950 1);
5951 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k]
5952 / PixelClock[k] / dpte_groups_per_row_luma_ub;
5953 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k]
5954 * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5955 time_per_pte_group_flip_luma[k] =
5956 DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k]
5957 / PixelClock[k]
5958 / dpte_groups_per_row_luma_ub;
5959 if (BytePerPixelDETC[k] == 0) {
5960 time_per_pte_group_nom_chroma[k] = 0;
5961 time_per_pte_group_vblank_chroma[k] = 0;
5962 time_per_pte_group_flip_chroma[k] = 0;
5963 } else {
5964 if (SourceScan[k] == dm_horz) {
5965 dpte_group_width_chroma = dpte_group_bytes[k]
5966 / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5967 } else {
5968 dpte_group_width_chroma = dpte_group_bytes[k]
5969 / PTERequestSizeC[k]
5970 * PixelPTEReqHeightC[k];
5971 }
5972 dpte_groups_per_row_chroma_ub = dml_ceil(
5973 (float) dpte_row_width_chroma_ub[k]
5974 / dpte_group_width_chroma,
5975 1);
5976 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k]
5977 * HTotal[k] / PixelClock[k]
5978 / dpte_groups_per_row_chroma_ub;
5979 time_per_pte_group_vblank_chroma[k] =
5980 DestinationLinesToRequestRowInVBlank[k] * HTotal[k]
5981 / PixelClock[k]
5982 / dpte_groups_per_row_chroma_ub;
5983 time_per_pte_group_flip_chroma[k] =
5984 DestinationLinesToRequestRowInImmediateFlip[k]
5985 * HTotal[k] / PixelClock[k]
5986 / dpte_groups_per_row_chroma_ub;
5987 }
5988 } else {
5989 time_per_pte_group_nom_luma[k] = 0;
5990 time_per_pte_group_vblank_luma[k] = 0;
5991 time_per_pte_group_flip_luma[k] = 0;
5992 time_per_pte_group_nom_chroma[k] = 0;
5993 time_per_pte_group_vblank_chroma[k] = 0;
5994 time_per_pte_group_flip_chroma[k] = 0;
5995 }
5996 }
5997
5998 for (k = 0; k < NumberOfActivePlanes; ++k) {
5999 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6000 if (DCCEnable[k] == false) {
6001 if (BytePerPixelDETC[k] > 0) {
6002 num_group_per_lower_vm_stage =
6003 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6004 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6005 } else {
6006 num_group_per_lower_vm_stage =
6007 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6008 }
6009 } else {
6010 if (GPUVMMaxPageTableLevels == 1) {
6011 if (BytePerPixelDETC[k] > 0) {
6012 num_group_per_lower_vm_stage =
6013 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6014 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6015 } else {
6016 num_group_per_lower_vm_stage =
6017 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6018 }
6019 } else {
6020 if (BytePerPixelDETC[k] > 0) {
6021 num_group_per_lower_vm_stage =
6022 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6023 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6024 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6025 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6026 } else {
6027 num_group_per_lower_vm_stage =
6028 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6029 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6030 }
6031 }
6032 }
6033
6034 if (DCCEnable[k] == false) {
6035 if (BytePerPixelDETC[k] > 0) {
6036 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
6037 / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6038 } else {
6039 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
6040 / 64;
6041 }
6042 } else {
6043 if (GPUVMMaxPageTableLevels == 1) {
6044 if (BytePerPixelDETC[k] > 0) {
6045 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6046 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6047 } else {
6048 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6049 }
6050 } else {
6051 if (BytePerPixelDETC[k] > 0) {
6052 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6053 + dpde0_bytes_per_frame_ub_c[k] / 64
6054 + meta_pte_bytes_per_frame_ub_l[k] / 64
6055 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6056 } else {
6057 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6058 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6059 }
6060 }
6061 }
6062
6063 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k]
6064 / PixelClock[k] / num_group_per_lower_vm_stage;
6065 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
6066 * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6067 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k]
6068 * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6069 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
6070 * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6071
6072 if (GPUVMMaxPageTableLevels > 2) {
6073 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6074 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6075 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6076 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6077 }
6078
6079 } else {
6080 TimePerVMGroupVBlank[k] = 0;
6081 TimePerVMGroupFlip[k] = 0;
6082 TimePerVMRequestVBlank[k] = 0;
6083 TimePerVMRequestFlip[k] = 0;
6084 }
6085 }
6086 }
6087
CalculateExtraLatency(double UrgentRoundTripAndOutOfOrderLatency,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,int HostVMMaxPageTableLevels,int HostVMCachedPageTableLevels)6088 static double CalculateExtraLatency(
6089 double UrgentRoundTripAndOutOfOrderLatency,
6090 int TotalNumberOfActiveDPP,
6091 int PixelChunkSizeInKByte,
6092 int TotalNumberOfDCCActiveDPP,
6093 int MetaChunkSize,
6094 double ReturnBW,
6095 bool GPUVMEnable,
6096 bool HostVMEnable,
6097 int NumberOfActivePlanes,
6098 int NumberOfDPP[],
6099 int dpte_group_bytes[],
6100 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6101 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6102 int HostVMMaxPageTableLevels,
6103 int HostVMCachedPageTableLevels)
6104 {
6105 double CalculateExtraLatency;
6106 double HostVMInefficiencyFactor;
6107 int HostVMDynamicLevels;
6108
6109 if (GPUVMEnable && HostVMEnable) {
6110 HostVMInefficiencyFactor =
6111 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6112 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6113 HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
6114 } else {
6115 HostVMInefficiencyFactor = 1;
6116 HostVMDynamicLevels = 0;
6117 }
6118
6119 CalculateExtraLatency = UrgentRoundTripAndOutOfOrderLatency
6120 + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte
6121 + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0
6122 / ReturnBW;
6123
6124 if (GPUVMEnable) {
6125 int k;
6126
6127 for (k = 0; k < NumberOfActivePlanes; k++) {
6128 CalculateExtraLatency = CalculateExtraLatency
6129 + NumberOfDPP[k] * dpte_group_bytes[k]
6130 * (1 + 8 * HostVMDynamicLevels)
6131 * HostVMInefficiencyFactor / ReturnBW;
6132 }
6133 }
6134 return CalculateExtraLatency;
6135 }
6136
6137