1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "dc_link.h"
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 #define BPP_INVALID 0
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
45
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
48
49 // Move these to ip paramaters/constant
50
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
53
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
56
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
59
60 typedef struct {
61 double DPPCLK;
62 double DISPCLK;
63 double PixelClock;
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
66 bool ScalerEnabled;
67 enum scan_direction_class SourceScan;
68 unsigned int BlockWidth256BytesY;
69 unsigned int BlockHeight256BytesY;
70 unsigned int BlockWidth256BytesC;
71 unsigned int BlockHeight256BytesC;
72 unsigned int InterlaceEnable;
73 unsigned int NumberOfCursors;
74 unsigned int VBlank;
75 unsigned int HTotal;
76 unsigned int DCCEnable;
77 bool ODMCombineIsEnabled;
78 enum source_format_class SourcePixelFormat;
79 int BytePerPixelY;
80 int BytePerPixelC;
81 bool ProgressiveToInterlaceUnitInOPP;
82 } Pipe;
83
84 #define BPP_INVALID 0
85 #define BPP_BLENDED_PIPE 0xffffffff
86
87 static bool CalculateBytePerPixelAnd256BBlockSizes(
88 enum source_format_class SourcePixelFormat,
89 enum dm_swizzle_mode SurfaceTiling,
90 unsigned int *BytePerPixelY,
91 unsigned int *BytePerPixelC,
92 double *BytePerPixelDETY,
93 double *BytePerPixelDETC,
94 unsigned int *BlockHeight256BytesY,
95 unsigned int *BlockHeight256BytesC,
96 unsigned int *BlockWidth256BytesY,
97 unsigned int *BlockWidth256BytesC);
98 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
99 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
100 static unsigned int dscceComputeDelay(
101 unsigned int bpc,
102 double BPP,
103 unsigned int sliceWidth,
104 unsigned int numSlices,
105 enum output_format_class pixelFormat,
106 enum output_encoder_class Output);
107 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
108 static bool CalculatePrefetchSchedule(
109 struct display_mode_lib *mode_lib,
110 double HostVMInefficiencyFactor,
111 Pipe *myPipe,
112 unsigned int DSCDelay,
113 double DPPCLKDelaySubtotalPlusCNVCFormater,
114 double DPPCLKDelaySCL,
115 double DPPCLKDelaySCLLBOnly,
116 double DPPCLKDelayCNVCCursor,
117 double DISPCLKDelaySubtotal,
118 unsigned int DPP_RECOUT_WIDTH,
119 enum output_format_class OutputFormat,
120 unsigned int MaxInterDCNTileRepeaters,
121 unsigned int VStartup,
122 unsigned int MaxVStartup,
123 unsigned int GPUVMPageTableLevels,
124 bool GPUVMEnable,
125 bool HostVMEnable,
126 unsigned int HostVMMaxNonCachedPageTableLevels,
127 double HostVMMinPageSize,
128 bool DynamicMetadataEnable,
129 bool DynamicMetadataVMEnabled,
130 int DynamicMetadataLinesBeforeActiveRequired,
131 unsigned int DynamicMetadataTransmittedBytes,
132 double UrgentLatency,
133 double UrgentExtraLatency,
134 double TCalc,
135 unsigned int PDEAndMetaPTEBytesFrame,
136 unsigned int MetaRowByte,
137 unsigned int PixelPTEBytesPerRow,
138 double PrefetchSourceLinesY,
139 unsigned int SwathWidthY,
140 double VInitPreFillY,
141 unsigned int MaxNumSwathY,
142 double PrefetchSourceLinesC,
143 unsigned int SwathWidthC,
144 double VInitPreFillC,
145 unsigned int MaxNumSwathC,
146 int swath_width_luma_ub,
147 int swath_width_chroma_ub,
148 unsigned int SwathHeightY,
149 unsigned int SwathHeightC,
150 double TWait,
151 double *DSTXAfterScaler,
152 double *DSTYAfterScaler,
153 double *DestinationLinesForPrefetch,
154 double *PrefetchBandwidth,
155 double *DestinationLinesToRequestVMInVBlank,
156 double *DestinationLinesToRequestRowInVBlank,
157 double *VRatioPrefetchY,
158 double *VRatioPrefetchC,
159 double *RequiredPrefetchPixDataBWLuma,
160 double *RequiredPrefetchPixDataBWChroma,
161 bool *NotEnoughTimeForDynamicMetadata,
162 double *Tno_bw,
163 double *prefetch_vmrow_bw,
164 double *Tdmdl_vm,
165 double *Tdmdl,
166 double *TSetup,
167 int *VUpdateOffsetPix,
168 double *VUpdateWidthPix,
169 double *VReadyOffsetPix);
170 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
171 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
172 static void CalculateDCCConfiguration(
173 bool DCCEnabled,
174 bool DCCProgrammingAssumesScanDirectionUnknown,
175 enum source_format_class SourcePixelFormat,
176 unsigned int SurfaceWidthLuma,
177 unsigned int SurfaceWidthChroma,
178 unsigned int SurfaceHeightLuma,
179 unsigned int SurfaceHeightChroma,
180 double DETBufferSize,
181 unsigned int RequestHeight256ByteLuma,
182 unsigned int RequestHeight256ByteChroma,
183 enum dm_swizzle_mode TilingFormat,
184 unsigned int BytePerPixelY,
185 unsigned int BytePerPixelC,
186 double BytePerPixelDETY,
187 double BytePerPixelDETC,
188 enum scan_direction_class ScanOrientation,
189 unsigned int *MaxUncompressedBlockLuma,
190 unsigned int *MaxUncompressedBlockChroma,
191 unsigned int *MaxCompressedBlockLuma,
192 unsigned int *MaxCompressedBlockChroma,
193 unsigned int *IndependentBlockLuma,
194 unsigned int *IndependentBlockChroma);
195 static double CalculatePrefetchSourceLines(
196 struct display_mode_lib *mode_lib,
197 double VRatio,
198 double vtaps,
199 bool Interlace,
200 bool ProgressiveToInterlaceUnitInOPP,
201 unsigned int SwathHeight,
202 unsigned int ViewportYStart,
203 double *VInitPreFill,
204 unsigned int *MaxNumSwath);
205 static unsigned int CalculateVMAndRowBytes(
206 struct display_mode_lib *mode_lib,
207 bool DCCEnable,
208 unsigned int BlockHeight256Bytes,
209 unsigned int BlockWidth256Bytes,
210 enum source_format_class SourcePixelFormat,
211 unsigned int SurfaceTiling,
212 unsigned int BytePerPixel,
213 enum scan_direction_class ScanDirection,
214 unsigned int SwathWidth,
215 unsigned int ViewportHeight,
216 bool GPUVMEnable,
217 bool HostVMEnable,
218 unsigned int HostVMMaxNonCachedPageTableLevels,
219 unsigned int GPUVMMinPageSize,
220 unsigned int HostVMMinPageSize,
221 unsigned int PTEBufferSizeInRequests,
222 unsigned int Pitch,
223 unsigned int DCCMetaPitch,
224 unsigned int *MacroTileWidth,
225 unsigned int *MetaRowByte,
226 unsigned int *PixelPTEBytesPerRow,
227 bool *PTEBufferSizeNotExceeded,
228 int *dpte_row_width_ub,
229 unsigned int *dpte_row_height,
230 unsigned int *MetaRequestWidth,
231 unsigned int *MetaRequestHeight,
232 unsigned int *meta_row_width,
233 unsigned int *meta_row_height,
234 int *vm_group_bytes,
235 unsigned int *dpte_group_bytes,
236 unsigned int *PixelPTEReqWidth,
237 unsigned int *PixelPTEReqHeight,
238 unsigned int *PTERequestSize,
239 int *DPDE0BytesFrame,
240 int *MetaPTEBytesFrame);
241 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
242 static void CalculateRowBandwidth(
243 bool GPUVMEnable,
244 enum source_format_class SourcePixelFormat,
245 double VRatio,
246 double VRatioChroma,
247 bool DCCEnable,
248 double LineTime,
249 unsigned int MetaRowByteLuma,
250 unsigned int MetaRowByteChroma,
251 unsigned int meta_row_height_luma,
252 unsigned int meta_row_height_chroma,
253 unsigned int PixelPTEBytesPerRowLuma,
254 unsigned int PixelPTEBytesPerRowChroma,
255 unsigned int dpte_row_height_luma,
256 unsigned int dpte_row_height_chroma,
257 double *meta_row_bw,
258 double *dpte_row_bw);
259
260 static void CalculateFlipSchedule(
261 struct display_mode_lib *mode_lib,
262 unsigned int k,
263 double HostVMInefficiencyFactor,
264 double UrgentExtraLatency,
265 double UrgentLatency,
266 double PDEAndMetaPTEBytesPerFrame,
267 double MetaRowBytes,
268 double DPTEBytesPerRow);
269 static double CalculateWriteBackDelay(
270 enum source_format_class WritebackPixelFormat,
271 double WritebackHRatio,
272 double WritebackVRatio,
273 unsigned int WritebackVTaps,
274 int WritebackDestinationWidth,
275 int WritebackDestinationHeight,
276 int WritebackSourceHeight,
277 unsigned int HTotal);
278
279 static void CalculateVupdateAndDynamicMetadataParameters(
280 int MaxInterDCNTileRepeaters,
281 double DPPCLK,
282 double DISPCLK,
283 double DCFClkDeepSleep,
284 double PixelClock,
285 int HTotal,
286 int VBlank,
287 int DynamicMetadataTransmittedBytes,
288 int DynamicMetadataLinesBeforeActiveRequired,
289 int InterlaceEnable,
290 bool ProgressiveToInterlaceUnitInOPP,
291 double *TSetup,
292 double *Tdmbf,
293 double *Tdmec,
294 double *Tdmsks,
295 int *VUpdateOffsetPix,
296 double *VUpdateWidthPix,
297 double *VReadyOffsetPix);
298
299 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
300 struct display_mode_lib *mode_lib,
301 unsigned int PrefetchMode,
302 double DCFCLK,
303 double ReturnBW,
304 double UrgentLatency,
305 double ExtraLatency,
306 double SOCCLK,
307 double DCFCLKDeepSleep,
308 unsigned int DETBufferSizeY[],
309 unsigned int DETBufferSizeC[],
310 unsigned int SwathHeightY[],
311 unsigned int SwathHeightC[],
312 double SwathWidthY[],
313 double SwathWidthC[],
314 unsigned int DPPPerPlane[],
315 double BytePerPixelDETY[],
316 double BytePerPixelDETC[],
317 bool UnboundedRequestEnabled,
318 int unsigned CompressedBufferSizeInkByte,
319 enum clock_change_support *DRAMClockChangeSupport,
320 double *StutterExitWatermark,
321 double *StutterEnterPlusExitWatermark,
322 double *Z8StutterExitWatermark,
323 double *Z8StutterEnterPlusExitWatermark);
324
325 static void CalculateDCFCLKDeepSleep(
326 struct display_mode_lib *mode_lib,
327 unsigned int NumberOfActivePlanes,
328 int BytePerPixelY[],
329 int BytePerPixelC[],
330 double VRatio[],
331 double VRatioChroma[],
332 double SwathWidthY[],
333 double SwathWidthC[],
334 unsigned int DPPPerPlane[],
335 double HRatio[],
336 double HRatioChroma[],
337 double PixelClock[],
338 double PSCL_THROUGHPUT[],
339 double PSCL_THROUGHPUT_CHROMA[],
340 double DPPCLK[],
341 double ReadBandwidthLuma[],
342 double ReadBandwidthChroma[],
343 int ReturnBusWidth,
344 double *DCFCLKDeepSleep);
345
346 static void CalculateUrgentBurstFactor(
347 int swath_width_luma_ub,
348 int swath_width_chroma_ub,
349 unsigned int SwathHeightY,
350 unsigned int SwathHeightC,
351 double LineTime,
352 double UrgentLatency,
353 double CursorBufferSize,
354 unsigned int CursorWidth,
355 unsigned int CursorBPP,
356 double VRatio,
357 double VRatioC,
358 double BytePerPixelInDETY,
359 double BytePerPixelInDETC,
360 double DETBufferSizeY,
361 double DETBufferSizeC,
362 double *UrgentBurstFactorCursor,
363 double *UrgentBurstFactorLuma,
364 double *UrgentBurstFactorChroma,
365 bool *NotEnoughUrgentLatencyHiding);
366
367 static void UseMinimumDCFCLK(
368 struct display_mode_lib *mode_lib,
369 int MaxInterDCNTileRepeaters,
370 int MaxPrefetchMode,
371 double FinalDRAMClockChangeLatency,
372 double SREnterPlusExitTime,
373 int ReturnBusWidth,
374 int RoundTripPingLatencyCycles,
375 int ReorderingBytes,
376 int PixelChunkSizeInKByte,
377 int MetaChunkSize,
378 bool GPUVMEnable,
379 int GPUVMMaxPageTableLevels,
380 bool HostVMEnable,
381 int NumberOfActivePlanes,
382 double HostVMMinPageSize,
383 int HostVMMaxNonCachedPageTableLevels,
384 bool DynamicMetadataVMEnabled,
385 enum immediate_flip_requirement ImmediateFlipRequirement,
386 bool ProgressiveToInterlaceUnitInOPP,
387 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
388 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
389 int VTotal[],
390 int VActive[],
391 int DynamicMetadataTransmittedBytes[],
392 int DynamicMetadataLinesBeforeActiveRequired[],
393 bool Interlace[],
394 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
395 double RequiredDISPCLK[][2],
396 double UrgLatency[],
397 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
398 double ProjectedDCFCLKDeepSleep[][2],
399 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
400 double TotalVActivePixelBandwidth[][2],
401 double TotalVActiveCursorBandwidth[][2],
402 double TotalMetaRowBandwidth[][2],
403 double TotalDPTERowBandwidth[][2],
404 unsigned int TotalNumberOfActiveDPP[][2],
405 unsigned int TotalNumberOfDCCActiveDPP[][2],
406 int dpte_group_bytes[],
407 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
408 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
409 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
410 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
411 int BytePerPixelY[],
412 int BytePerPixelC[],
413 int HTotal[],
414 double PixelClock[],
415 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
416 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
417 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
418 bool DynamicMetadataEnable[],
419 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
420 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
421 double ReadBandwidthLuma[],
422 double ReadBandwidthChroma[],
423 double DCFCLKPerState[],
424 double DCFCLKState[][2]);
425
426 static void CalculatePixelDeliveryTimes(
427 unsigned int NumberOfActivePlanes,
428 double VRatio[],
429 double VRatioChroma[],
430 double VRatioPrefetchY[],
431 double VRatioPrefetchC[],
432 unsigned int swath_width_luma_ub[],
433 unsigned int swath_width_chroma_ub[],
434 unsigned int DPPPerPlane[],
435 double HRatio[],
436 double HRatioChroma[],
437 double PixelClock[],
438 double PSCL_THROUGHPUT[],
439 double PSCL_THROUGHPUT_CHROMA[],
440 double DPPCLK[],
441 int BytePerPixelC[],
442 enum scan_direction_class SourceScan[],
443 unsigned int NumberOfCursors[],
444 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
445 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
446 unsigned int BlockWidth256BytesY[],
447 unsigned int BlockHeight256BytesY[],
448 unsigned int BlockWidth256BytesC[],
449 unsigned int BlockHeight256BytesC[],
450 double DisplayPipeLineDeliveryTimeLuma[],
451 double DisplayPipeLineDeliveryTimeChroma[],
452 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
453 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
454 double DisplayPipeRequestDeliveryTimeLuma[],
455 double DisplayPipeRequestDeliveryTimeChroma[],
456 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
457 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
458 double CursorRequestDeliveryTime[],
459 double CursorRequestDeliveryTimePrefetch[]);
460
461 static void CalculateMetaAndPTETimes(
462 int NumberOfActivePlanes,
463 bool GPUVMEnable,
464 int MetaChunkSize,
465 int MinMetaChunkSizeBytes,
466 int HTotal[],
467 double VRatio[],
468 double VRatioChroma[],
469 double DestinationLinesToRequestRowInVBlank[],
470 double DestinationLinesToRequestRowInImmediateFlip[],
471 bool DCCEnable[],
472 double PixelClock[],
473 int BytePerPixelY[],
474 int BytePerPixelC[],
475 enum scan_direction_class SourceScan[],
476 int dpte_row_height[],
477 int dpte_row_height_chroma[],
478 int meta_row_width[],
479 int meta_row_width_chroma[],
480 int meta_row_height[],
481 int meta_row_height_chroma[],
482 int meta_req_width[],
483 int meta_req_width_chroma[],
484 int meta_req_height[],
485 int meta_req_height_chroma[],
486 int dpte_group_bytes[],
487 int PTERequestSizeY[],
488 int PTERequestSizeC[],
489 int PixelPTEReqWidthY[],
490 int PixelPTEReqHeightY[],
491 int PixelPTEReqWidthC[],
492 int PixelPTEReqHeightC[],
493 int dpte_row_width_luma_ub[],
494 int dpte_row_width_chroma_ub[],
495 double DST_Y_PER_PTE_ROW_NOM_L[],
496 double DST_Y_PER_PTE_ROW_NOM_C[],
497 double DST_Y_PER_META_ROW_NOM_L[],
498 double DST_Y_PER_META_ROW_NOM_C[],
499 double TimePerMetaChunkNominal[],
500 double TimePerChromaMetaChunkNominal[],
501 double TimePerMetaChunkVBlank[],
502 double TimePerChromaMetaChunkVBlank[],
503 double TimePerMetaChunkFlip[],
504 double TimePerChromaMetaChunkFlip[],
505 double time_per_pte_group_nom_luma[],
506 double time_per_pte_group_vblank_luma[],
507 double time_per_pte_group_flip_luma[],
508 double time_per_pte_group_nom_chroma[],
509 double time_per_pte_group_vblank_chroma[],
510 double time_per_pte_group_flip_chroma[]);
511
512 static void CalculateVMGroupAndRequestTimes(
513 unsigned int NumberOfActivePlanes,
514 bool GPUVMEnable,
515 unsigned int GPUVMMaxPageTableLevels,
516 unsigned int HTotal[],
517 int BytePerPixelC[],
518 double DestinationLinesToRequestVMInVBlank[],
519 double DestinationLinesToRequestVMInImmediateFlip[],
520 bool DCCEnable[],
521 double PixelClock[],
522 int dpte_row_width_luma_ub[],
523 int dpte_row_width_chroma_ub[],
524 int vm_group_bytes[],
525 unsigned int dpde0_bytes_per_frame_ub_l[],
526 unsigned int dpde0_bytes_per_frame_ub_c[],
527 int meta_pte_bytes_per_frame_ub_l[],
528 int meta_pte_bytes_per_frame_ub_c[],
529 double TimePerVMGroupVBlank[],
530 double TimePerVMGroupFlip[],
531 double TimePerVMRequestVBlank[],
532 double TimePerVMRequestFlip[]);
533
534 static void CalculateStutterEfficiency(
535 struct display_mode_lib *mode_lib,
536 int CompressedBufferSizeInkByte,
537 bool UnboundedRequestEnabled,
538 int ConfigReturnBufferSizeInKByte,
539 int MetaFIFOSizeInKEntries,
540 int ZeroSizeBufferEntries,
541 int NumberOfActivePlanes,
542 int ROBBufferSizeInKByte,
543 double TotalDataReadBandwidth,
544 double DCFCLK,
545 double ReturnBW,
546 double COMPBUF_RESERVED_SPACE_64B,
547 double COMPBUF_RESERVED_SPACE_ZS,
548 double SRExitTime,
549 double SRExitZ8Time,
550 bool SynchronizedVBlank,
551 double Z8StutterEnterPlusExitWatermark,
552 double StutterEnterPlusExitWatermark,
553 bool ProgressiveToInterlaceUnitInOPP,
554 bool Interlace[],
555 double MinTTUVBlank[],
556 int DPPPerPlane[],
557 unsigned int DETBufferSizeY[],
558 int BytePerPixelY[],
559 double BytePerPixelDETY[],
560 double SwathWidthY[],
561 int SwathHeightY[],
562 int SwathHeightC[],
563 double NetDCCRateLuma[],
564 double NetDCCRateChroma[],
565 double DCCFractionOfZeroSizeRequestsLuma[],
566 double DCCFractionOfZeroSizeRequestsChroma[],
567 int HTotal[],
568 int VTotal[],
569 double PixelClock[],
570 double VRatio[],
571 enum scan_direction_class SourceScan[],
572 int BlockHeight256BytesY[],
573 int BlockWidth256BytesY[],
574 int BlockHeight256BytesC[],
575 int BlockWidth256BytesC[],
576 int DCCYMaxUncompressedBlock[],
577 int DCCCMaxUncompressedBlock[],
578 int VActive[],
579 bool DCCEnable[],
580 bool WritebackEnable[],
581 double ReadBandwidthPlaneLuma[],
582 double ReadBandwidthPlaneChroma[],
583 double meta_row_bw[],
584 double dpte_row_bw[],
585 double *StutterEfficiencyNotIncludingVBlank,
586 double *StutterEfficiency,
587 int *NumberOfStutterBurstsPerFrame,
588 double *Z8StutterEfficiencyNotIncludingVBlank,
589 double *Z8StutterEfficiency,
590 int *Z8NumberOfStutterBurstsPerFrame,
591 double *StutterPeriod);
592
593 static void CalculateSwathAndDETConfiguration(
594 bool ForceSingleDPP,
595 int NumberOfActivePlanes,
596 unsigned int DETBufferSizeInKByte,
597 double MaximumSwathWidthLuma[],
598 double MaximumSwathWidthChroma[],
599 enum scan_direction_class SourceScan[],
600 enum source_format_class SourcePixelFormat[],
601 enum dm_swizzle_mode SurfaceTiling[],
602 int ViewportWidth[],
603 int ViewportHeight[],
604 int SurfaceWidthY[],
605 int SurfaceWidthC[],
606 int SurfaceHeightY[],
607 int SurfaceHeightC[],
608 int Read256BytesBlockHeightY[],
609 int Read256BytesBlockHeightC[],
610 int Read256BytesBlockWidthY[],
611 int Read256BytesBlockWidthC[],
612 enum odm_combine_mode ODMCombineEnabled[],
613 int BlendingAndTiming[],
614 int BytePerPixY[],
615 int BytePerPixC[],
616 double BytePerPixDETY[],
617 double BytePerPixDETC[],
618 int HActive[],
619 double HRatio[],
620 double HRatioChroma[],
621 int DPPPerPlane[],
622 int swath_width_luma_ub[],
623 int swath_width_chroma_ub[],
624 double SwathWidth[],
625 double SwathWidthChroma[],
626 int SwathHeightY[],
627 int SwathHeightC[],
628 unsigned int DETBufferSizeY[],
629 unsigned int DETBufferSizeC[],
630 bool ViewportSizeSupportPerPlane[],
631 bool *ViewportSizeSupport);
632 static void CalculateSwathWidth(
633 bool ForceSingleDPP,
634 int NumberOfActivePlanes,
635 enum source_format_class SourcePixelFormat[],
636 enum scan_direction_class SourceScan[],
637 int ViewportWidth[],
638 int ViewportHeight[],
639 int SurfaceWidthY[],
640 int SurfaceWidthC[],
641 int SurfaceHeightY[],
642 int SurfaceHeightC[],
643 enum odm_combine_mode ODMCombineEnabled[],
644 int BytePerPixY[],
645 int BytePerPixC[],
646 int Read256BytesBlockHeightY[],
647 int Read256BytesBlockHeightC[],
648 int Read256BytesBlockWidthY[],
649 int Read256BytesBlockWidthC[],
650 int BlendingAndTiming[],
651 int HActive[],
652 double HRatio[],
653 int DPPPerPlane[],
654 double SwathWidthSingleDPPY[],
655 double SwathWidthSingleDPPC[],
656 double SwathWidthY[],
657 double SwathWidthC[],
658 int MaximumSwathHeightY[],
659 int MaximumSwathHeightC[],
660 int swath_width_luma_ub[],
661 int swath_width_chroma_ub[]);
662
663 static double CalculateExtraLatency(
664 int RoundTripPingLatencyCycles,
665 int ReorderingBytes,
666 double DCFCLK,
667 int TotalNumberOfActiveDPP,
668 int PixelChunkSizeInKByte,
669 int TotalNumberOfDCCActiveDPP,
670 int MetaChunkSize,
671 double ReturnBW,
672 bool GPUVMEnable,
673 bool HostVMEnable,
674 int NumberOfActivePlanes,
675 int NumberOfDPP[],
676 int dpte_group_bytes[],
677 double HostVMInefficiencyFactor,
678 double HostVMMinPageSize,
679 int HostVMMaxNonCachedPageTableLevels);
680
681 static double CalculateExtraLatencyBytes(
682 int ReorderingBytes,
683 int TotalNumberOfActiveDPP,
684 int PixelChunkSizeInKByte,
685 int TotalNumberOfDCCActiveDPP,
686 int MetaChunkSize,
687 bool GPUVMEnable,
688 bool HostVMEnable,
689 int NumberOfActivePlanes,
690 int NumberOfDPP[],
691 int dpte_group_bytes[],
692 double HostVMInefficiencyFactor,
693 double HostVMMinPageSize,
694 int HostVMMaxNonCachedPageTableLevels);
695
696 static double CalculateUrgentLatency(
697 double UrgentLatencyPixelDataOnly,
698 double UrgentLatencyPixelMixedWithVMData,
699 double UrgentLatencyVMDataOnly,
700 bool DoUrgentLatencyAdjustment,
701 double UrgentLatencyAdjustmentFabricClockComponent,
702 double UrgentLatencyAdjustmentFabricClockReference,
703 double FabricClockSingle);
704
705 static void CalculateUnboundedRequestAndCompressedBufferSize(
706 unsigned int DETBufferSizeInKByte,
707 int ConfigReturnBufferSizeInKByte,
708 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
709 int TotalActiveDPP,
710 bool NoChromaPlanes,
711 int MaxNumDPP,
712 int CompressedBufferSegmentSizeInkByteFinal,
713 enum output_encoder_class *Output,
714 bool *UnboundedRequestEnabled,
715 int *CompressedBufferSizeInkByte);
716
717 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
718
dml31_recalculate(struct display_mode_lib * mode_lib)719 void dml31_recalculate(struct display_mode_lib *mode_lib)
720 {
721 ModeSupportAndSystemConfiguration(mode_lib);
722 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
723 DisplayPipeConfiguration(mode_lib);
724 #ifdef __DML_VBA_DEBUG__
725 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
726 #endif
727 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
728 }
729
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)730 static unsigned int dscceComputeDelay(
731 unsigned int bpc,
732 double BPP,
733 unsigned int sliceWidth,
734 unsigned int numSlices,
735 enum output_format_class pixelFormat,
736 enum output_encoder_class Output)
737 {
738 // valid bpc = source bits per component in the set of {8, 10, 12}
739 // valid bpp = increments of 1/16 of a bit
740 // min = 6/7/8 in N420/N422/444, respectively
741 // max = such that compression is 1:1
742 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
743 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
744 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
745
746 // fixed value
747 unsigned int rcModelSize = 8192;
748
749 // N422/N420 operate at 2 pixels per clock
750 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
751
752 if (pixelFormat == dm_420)
753 pixelsPerClock = 2;
754 else if (pixelFormat == dm_444)
755 pixelsPerClock = 1;
756 else if (pixelFormat == dm_n422)
757 pixelsPerClock = 2;
758 // #all other modes operate at 1 pixel per clock
759 else
760 pixelsPerClock = 1;
761
762 //initial transmit delay as per PPS
763 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
764
765 //compute ssm delay
766 if (bpc == 8)
767 D = 81;
768 else if (bpc == 10)
769 D = 89;
770 else
771 D = 113;
772
773 //divide by pixel per cycle to compute slice width as seen by DSC
774 w = sliceWidth / pixelsPerClock;
775
776 //422 mode has an additional cycle of delay
777 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
778 s = 0;
779 else
780 s = 1;
781
782 //main calculation for the dscce
783 ix = initalXmitDelay + 45;
784 wx = (w + 2) / 3;
785 P = 3 * wx - w;
786 l0 = ix / w;
787 a = ix + P * l0;
788 ax = (a + 2) / 3 + D + 6 + 1;
789 L = (ax + wx - 1) / wx;
790 if ((ix % w) == 0 && P != 0)
791 lstall = 1;
792 else
793 lstall = 0;
794 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
795
796 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
797 pixels = Delay * 3 * pixelsPerClock;
798 return pixels;
799 }
800
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)801 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
802 {
803 unsigned int Delay = 0;
804
805 if (pixelFormat == dm_420) {
806 // sfr
807 Delay = Delay + 2;
808 // dsccif
809 Delay = Delay + 0;
810 // dscc - input deserializer
811 Delay = Delay + 3;
812 // dscc gets pixels every other cycle
813 Delay = Delay + 2;
814 // dscc - input cdc fifo
815 Delay = Delay + 12;
816 // dscc gets pixels every other cycle
817 Delay = Delay + 13;
818 // dscc - cdc uncertainty
819 Delay = Delay + 2;
820 // dscc - output cdc fifo
821 Delay = Delay + 7;
822 // dscc gets pixels every other cycle
823 Delay = Delay + 3;
824 // dscc - cdc uncertainty
825 Delay = Delay + 2;
826 // dscc - output serializer
827 Delay = Delay + 1;
828 // sft
829 Delay = Delay + 1;
830 } else if (pixelFormat == dm_n422) {
831 // sfr
832 Delay = Delay + 2;
833 // dsccif
834 Delay = Delay + 1;
835 // dscc - input deserializer
836 Delay = Delay + 5;
837 // dscc - input cdc fifo
838 Delay = Delay + 25;
839 // dscc - cdc uncertainty
840 Delay = Delay + 2;
841 // dscc - output cdc fifo
842 Delay = Delay + 10;
843 // dscc - cdc uncertainty
844 Delay = Delay + 2;
845 // dscc - output serializer
846 Delay = Delay + 1;
847 // sft
848 Delay = Delay + 1;
849 } else {
850 // sfr
851 Delay = Delay + 2;
852 // dsccif
853 Delay = Delay + 0;
854 // dscc - input deserializer
855 Delay = Delay + 3;
856 // dscc - input cdc fifo
857 Delay = Delay + 12;
858 // dscc - cdc uncertainty
859 Delay = Delay + 2;
860 // dscc - output cdc fifo
861 Delay = Delay + 7;
862 // dscc - output serializer
863 Delay = Delay + 1;
864 // dscc - cdc uncertainty
865 Delay = Delay + 2;
866 // sft
867 Delay = Delay + 1;
868 }
869
870 return Delay;
871 }
872
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)873 static bool CalculatePrefetchSchedule(
874 struct display_mode_lib *mode_lib,
875 double HostVMInefficiencyFactor,
876 Pipe *myPipe,
877 unsigned int DSCDelay,
878 double DPPCLKDelaySubtotalPlusCNVCFormater,
879 double DPPCLKDelaySCL,
880 double DPPCLKDelaySCLLBOnly,
881 double DPPCLKDelayCNVCCursor,
882 double DISPCLKDelaySubtotal,
883 unsigned int DPP_RECOUT_WIDTH,
884 enum output_format_class OutputFormat,
885 unsigned int MaxInterDCNTileRepeaters,
886 unsigned int VStartup,
887 unsigned int MaxVStartup,
888 unsigned int GPUVMPageTableLevels,
889 bool GPUVMEnable,
890 bool HostVMEnable,
891 unsigned int HostVMMaxNonCachedPageTableLevels,
892 double HostVMMinPageSize,
893 bool DynamicMetadataEnable,
894 bool DynamicMetadataVMEnabled,
895 int DynamicMetadataLinesBeforeActiveRequired,
896 unsigned int DynamicMetadataTransmittedBytes,
897 double UrgentLatency,
898 double UrgentExtraLatency,
899 double TCalc,
900 unsigned int PDEAndMetaPTEBytesFrame,
901 unsigned int MetaRowByte,
902 unsigned int PixelPTEBytesPerRow,
903 double PrefetchSourceLinesY,
904 unsigned int SwathWidthY,
905 double VInitPreFillY,
906 unsigned int MaxNumSwathY,
907 double PrefetchSourceLinesC,
908 unsigned int SwathWidthC,
909 double VInitPreFillC,
910 unsigned int MaxNumSwathC,
911 int swath_width_luma_ub,
912 int swath_width_chroma_ub,
913 unsigned int SwathHeightY,
914 unsigned int SwathHeightC,
915 double TWait,
916 double *DSTXAfterScaler,
917 double *DSTYAfterScaler,
918 double *DestinationLinesForPrefetch,
919 double *PrefetchBandwidth,
920 double *DestinationLinesToRequestVMInVBlank,
921 double *DestinationLinesToRequestRowInVBlank,
922 double *VRatioPrefetchY,
923 double *VRatioPrefetchC,
924 double *RequiredPrefetchPixDataBWLuma,
925 double *RequiredPrefetchPixDataBWChroma,
926 bool *NotEnoughTimeForDynamicMetadata,
927 double *Tno_bw,
928 double *prefetch_vmrow_bw,
929 double *Tdmdl_vm,
930 double *Tdmdl,
931 double *TSetup,
932 int *VUpdateOffsetPix,
933 double *VUpdateWidthPix,
934 double *VReadyOffsetPix)
935 {
936 bool MyError = false;
937 unsigned int DPPCycles, DISPCLKCycles;
938 double DSTTotalPixelsAfterScaler;
939 double LineTime;
940 double dst_y_prefetch_equ;
941 double Tsw_oto;
942 double prefetch_bw_oto;
943 double Tvm_oto;
944 double Tr0_oto;
945 double Tvm_oto_lines;
946 double Tr0_oto_lines;
947 double dst_y_prefetch_oto;
948 double TimeForFetchingMetaPTE = 0;
949 double TimeForFetchingRowInVBlank = 0;
950 double LinesToRequestPrefetchPixelData = 0;
951 unsigned int HostVMDynamicLevelsTrips;
952 double trip_to_mem;
953 double Tvm_trips;
954 double Tr0_trips;
955 double Tvm_trips_rounded;
956 double Tr0_trips_rounded;
957 double Lsw_oto;
958 double Tpre_rounded;
959 double prefetch_bw_equ;
960 double Tvm_equ;
961 double Tr0_equ;
962 double Tdmbf;
963 double Tdmec;
964 double Tdmsks;
965 double prefetch_sw_bytes;
966 double bytes_pp;
967 double dep_bytes;
968 int max_vratio_pre = 4;
969 double min_Lsw;
970 double Tsw_est1 = 0;
971 double Tsw_est3 = 0;
972
973 if (GPUVMEnable == true && HostVMEnable == true) {
974 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
975 } else {
976 HostVMDynamicLevelsTrips = 0;
977 }
978 #ifdef __DML_VBA_DEBUG__
979 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
980 #endif
981 CalculateVupdateAndDynamicMetadataParameters(
982 MaxInterDCNTileRepeaters,
983 myPipe->DPPCLK,
984 myPipe->DISPCLK,
985 myPipe->DCFCLKDeepSleep,
986 myPipe->PixelClock,
987 myPipe->HTotal,
988 myPipe->VBlank,
989 DynamicMetadataTransmittedBytes,
990 DynamicMetadataLinesBeforeActiveRequired,
991 myPipe->InterlaceEnable,
992 myPipe->ProgressiveToInterlaceUnitInOPP,
993 TSetup,
994 &Tdmbf,
995 &Tdmec,
996 &Tdmsks,
997 VUpdateOffsetPix,
998 VUpdateWidthPix,
999 VReadyOffsetPix);
1000
1001 LineTime = myPipe->HTotal / myPipe->PixelClock;
1002 trip_to_mem = UrgentLatency;
1003 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1004
1005 #ifdef __DML_VBA_ALLOW_DELTA__
1006 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1007 #else
1008 if (DynamicMetadataVMEnabled == true) {
1009 #endif
1010 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1011 } else {
1012 *Tdmdl = TWait + UrgentExtraLatency;
1013 }
1014
1015 #ifdef __DML_VBA_ALLOW_DELTA__
1016 if (DynamicMetadataEnable == false) {
1017 *Tdmdl = 0.0;
1018 }
1019 #endif
1020
1021 if (DynamicMetadataEnable == true) {
1022 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1023 *NotEnoughTimeForDynamicMetadata = true;
1024 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1025 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
1026 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
1027 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
1028 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1029 } else {
1030 *NotEnoughTimeForDynamicMetadata = false;
1031 }
1032 } else {
1033 *NotEnoughTimeForDynamicMetadata = false;
1034 }
1035
1036 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1037
1038 if (myPipe->ScalerEnabled)
1039 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1040 else
1041 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1042
1043 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1044
1045 DISPCLKCycles = DISPCLKDelaySubtotal;
1046
1047 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1048 return true;
1049
1050 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1051
1052 #ifdef __DML_VBA_DEBUG__
1053 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1054 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1055 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1056 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1057 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1058 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1059 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1060 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1061 #endif
1062
1063 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1064
1065 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1066 *DSTYAfterScaler = 1;
1067 else
1068 *DSTYAfterScaler = 0;
1069
1070 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1071 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1072 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1073
1074 #ifdef __DML_VBA_DEBUG__
1075 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1076 #endif
1077
1078 MyError = false;
1079
1080 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1081 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1082 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1083
1084 #ifdef __DML_VBA_ALLOW_DELTA__
1085 if (!myPipe->DCCEnable) {
1086 Tr0_trips = 0.0;
1087 Tr0_trips_rounded = 0.0;
1088 }
1089 #endif
1090
1091 if (!GPUVMEnable) {
1092 Tvm_trips = 0.0;
1093 Tvm_trips_rounded = 0.0;
1094 }
1095
1096 if (GPUVMEnable) {
1097 if (GPUVMPageTableLevels >= 3) {
1098 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1099 } else {
1100 *Tno_bw = 0;
1101 }
1102 } else if (!myPipe->DCCEnable) {
1103 *Tno_bw = LineTime;
1104 } else {
1105 *Tno_bw = LineTime / 4;
1106 }
1107
1108 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1109 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1110 else
1111 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1112
1113 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1114 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1115
1116 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
1117 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1118 Tsw_oto = Lsw_oto * LineTime;
1119
1120 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1121
1122 #ifdef __DML_VBA_DEBUG__
1123 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1124 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1125 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1126 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1127 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1128 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1129 #endif
1130
1131 if (GPUVMEnable == true)
1132 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1133 else
1134 Tvm_oto = LineTime / 4.0;
1135
1136 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1137 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1138 LineTime - Tvm_oto,
1139 LineTime / 4);
1140 } else {
1141 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1142 }
1143
1144 #ifdef __DML_VBA_DEBUG__
1145 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1146 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1147 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1148 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1149 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1150 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1151 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1152 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1153 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1154 #endif
1155
1156 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1157 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1158 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1159 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1160 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1161 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1162
1163 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1164
1165 if (prefetch_sw_bytes < dep_bytes)
1166 prefetch_sw_bytes = 2 * dep_bytes;
1167
1168 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1169 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1170 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1171 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1172 dml_print("DML: LineTime: %f\n", LineTime);
1173 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1174
1175 dml_print("DML: LineTime: %f\n", LineTime);
1176 dml_print("DML: VStartup: %d\n", VStartup);
1177 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1178 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1179 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1180 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1181 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1182 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1183 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1184 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1185 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1186 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1187 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1188
1189 *PrefetchBandwidth = 0;
1190 *DestinationLinesToRequestVMInVBlank = 0;
1191 *DestinationLinesToRequestRowInVBlank = 0;
1192 *VRatioPrefetchY = 0;
1193 *VRatioPrefetchC = 0;
1194 *RequiredPrefetchPixDataBWLuma = 0;
1195 if (dst_y_prefetch_equ > 1) {
1196 double PrefetchBandwidth1;
1197 double PrefetchBandwidth2;
1198 double PrefetchBandwidth3;
1199 double PrefetchBandwidth4;
1200
1201 if (Tpre_rounded - *Tno_bw > 0) {
1202 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1203 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1204 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1205 } else {
1206 PrefetchBandwidth1 = 0;
1207 }
1208
1209 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1210 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1211 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1212 }
1213
1214 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1215 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1216 else
1217 PrefetchBandwidth2 = 0;
1218
1219 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1220 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1221 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1222 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1223 } else {
1224 PrefetchBandwidth3 = 0;
1225 }
1226
1227 #ifdef __DML_VBA_DEBUG__
1228 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1229 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1230 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1231 #endif
1232 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1233 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1234 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1235 }
1236
1237 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1238 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1239 else
1240 PrefetchBandwidth4 = 0;
1241
1242 {
1243 bool Case1OK;
1244 bool Case2OK;
1245 bool Case3OK;
1246
1247 if (PrefetchBandwidth1 > 0) {
1248 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1249 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1250 Case1OK = true;
1251 } else {
1252 Case1OK = false;
1253 }
1254 } else {
1255 Case1OK = false;
1256 }
1257
1258 if (PrefetchBandwidth2 > 0) {
1259 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1260 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1261 Case2OK = true;
1262 } else {
1263 Case2OK = false;
1264 }
1265 } else {
1266 Case2OK = false;
1267 }
1268
1269 if (PrefetchBandwidth3 > 0) {
1270 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1271 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1272 Case3OK = true;
1273 } else {
1274 Case3OK = false;
1275 }
1276 } else {
1277 Case3OK = false;
1278 }
1279
1280 if (Case1OK) {
1281 prefetch_bw_equ = PrefetchBandwidth1;
1282 } else if (Case2OK) {
1283 prefetch_bw_equ = PrefetchBandwidth2;
1284 } else if (Case3OK) {
1285 prefetch_bw_equ = PrefetchBandwidth3;
1286 } else {
1287 prefetch_bw_equ = PrefetchBandwidth4;
1288 }
1289
1290 #ifdef __DML_VBA_DEBUG__
1291 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1292 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1293 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1294 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1295 #endif
1296
1297 if (prefetch_bw_equ > 0) {
1298 if (GPUVMEnable == true) {
1299 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1300 } else {
1301 Tvm_equ = LineTime / 4;
1302 }
1303
1304 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1305 Tr0_equ = dml_max4(
1306 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1307 Tr0_trips,
1308 (LineTime - Tvm_equ) / 2,
1309 LineTime / 4);
1310 } else {
1311 Tr0_equ = (LineTime - Tvm_equ) / 2;
1312 }
1313 } else {
1314 Tvm_equ = 0;
1315 Tr0_equ = 0;
1316 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1317 }
1318 }
1319
1320 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1321 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1322 TimeForFetchingMetaPTE = Tvm_oto;
1323 TimeForFetchingRowInVBlank = Tr0_oto;
1324 *PrefetchBandwidth = prefetch_bw_oto;
1325 } else {
1326 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1327 TimeForFetchingMetaPTE = Tvm_equ;
1328 TimeForFetchingRowInVBlank = Tr0_equ;
1329 *PrefetchBandwidth = prefetch_bw_equ;
1330 }
1331
1332 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1333
1334 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1335
1336 #ifdef __DML_VBA_ALLOW_DELTA__
1337 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1338 // See note above dated 5/30/2018
1339 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1340 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1341 #else
1342 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1343 #endif
1344
1345 #ifdef __DML_VBA_DEBUG__
1346 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1347 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1348 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1349 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1350 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1351 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1352 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1353 #endif
1354
1355 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1356
1357 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1358 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1359 #ifdef __DML_VBA_DEBUG__
1360 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1361 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1362 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1363 #endif
1364 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1365 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1366 *VRatioPrefetchY = dml_max(
1367 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1368 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1369 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1370 } else {
1371 MyError = true;
1372 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1373 *VRatioPrefetchY = 0;
1374 }
1375 #ifdef __DML_VBA_DEBUG__
1376 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1377 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1378 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1379 #endif
1380 }
1381
1382 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1383 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1384
1385 #ifdef __DML_VBA_DEBUG__
1386 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1387 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1388 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1389 #endif
1390 if ((SwathHeightC > 4)) {
1391 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1392 *VRatioPrefetchC = dml_max(
1393 *VRatioPrefetchC,
1394 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1395 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1396 } else {
1397 MyError = true;
1398 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1399 *VRatioPrefetchC = 0;
1400 }
1401 #ifdef __DML_VBA_DEBUG__
1402 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1403 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1404 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1405 #endif
1406 }
1407
1408 #ifdef __DML_VBA_DEBUG__
1409 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1410 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1411 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1412 #endif
1413
1414 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1415
1416 #ifdef __DML_VBA_DEBUG__
1417 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1418 #endif
1419
1420 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1421 / LineTime;
1422 } else {
1423 MyError = true;
1424 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1425 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1426 *VRatioPrefetchY = 0;
1427 *VRatioPrefetchC = 0;
1428 *RequiredPrefetchPixDataBWLuma = 0;
1429 *RequiredPrefetchPixDataBWChroma = 0;
1430 }
1431
1432 dml_print(
1433 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1434 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1435 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1436 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1437 dml_print(
1438 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1439 (double) LinesToRequestPrefetchPixelData * LineTime);
1440 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1441 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1442 (double) myPipe->HTotal)) * LineTime);
1443 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1444 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1445 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1446 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1447 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1448
1449 } else {
1450 MyError = true;
1451 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1452 }
1453
1454 {
1455 double prefetch_vm_bw;
1456 double prefetch_row_bw;
1457
1458 if (PDEAndMetaPTEBytesFrame == 0) {
1459 prefetch_vm_bw = 0;
1460 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1461 #ifdef __DML_VBA_DEBUG__
1462 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1463 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1464 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1465 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1466 #endif
1467 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1468 #ifdef __DML_VBA_DEBUG__
1469 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1470 #endif
1471 } else {
1472 prefetch_vm_bw = 0;
1473 MyError = true;
1474 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1475 }
1476
1477 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1478 prefetch_row_bw = 0;
1479 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1480 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1481
1482 #ifdef __DML_VBA_DEBUG__
1483 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1484 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1485 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1486 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1487 #endif
1488 } else {
1489 prefetch_row_bw = 0;
1490 MyError = true;
1491 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1492 }
1493
1494 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1495 }
1496
1497 if (MyError) {
1498 *PrefetchBandwidth = 0;
1499 TimeForFetchingMetaPTE = 0;
1500 TimeForFetchingRowInVBlank = 0;
1501 *DestinationLinesToRequestVMInVBlank = 0;
1502 *DestinationLinesToRequestRowInVBlank = 0;
1503 *DestinationLinesForPrefetch = 0;
1504 LinesToRequestPrefetchPixelData = 0;
1505 *VRatioPrefetchY = 0;
1506 *VRatioPrefetchC = 0;
1507 *RequiredPrefetchPixDataBWLuma = 0;
1508 *RequiredPrefetchPixDataBWChroma = 0;
1509 }
1510
1511 return MyError;
1512 }
1513
1514 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1515 {
1516 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1517 }
1518
1519 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1520 {
1521 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1522 }
1523
1524 static void CalculateDCCConfiguration(
1525 bool DCCEnabled,
1526 bool DCCProgrammingAssumesScanDirectionUnknown,
1527 enum source_format_class SourcePixelFormat,
1528 unsigned int SurfaceWidthLuma,
1529 unsigned int SurfaceWidthChroma,
1530 unsigned int SurfaceHeightLuma,
1531 unsigned int SurfaceHeightChroma,
1532 double DETBufferSize,
1533 unsigned int RequestHeight256ByteLuma,
1534 unsigned int RequestHeight256ByteChroma,
1535 enum dm_swizzle_mode TilingFormat,
1536 unsigned int BytePerPixelY,
1537 unsigned int BytePerPixelC,
1538 double BytePerPixelDETY,
1539 double BytePerPixelDETC,
1540 enum scan_direction_class ScanOrientation,
1541 unsigned int *MaxUncompressedBlockLuma,
1542 unsigned int *MaxUncompressedBlockChroma,
1543 unsigned int *MaxCompressedBlockLuma,
1544 unsigned int *MaxCompressedBlockChroma,
1545 unsigned int *IndependentBlockLuma,
1546 unsigned int *IndependentBlockChroma)
1547 {
1548 int yuv420;
1549 int horz_div_l;
1550 int horz_div_c;
1551 int vert_div_l;
1552 int vert_div_c;
1553
1554 int swath_buf_size;
1555 double detile_buf_vp_horz_limit;
1556 double detile_buf_vp_vert_limit;
1557
1558 int MAS_vp_horz_limit;
1559 int MAS_vp_vert_limit;
1560 int max_vp_horz_width;
1561 int max_vp_vert_height;
1562 int eff_surf_width_l;
1563 int eff_surf_width_c;
1564 int eff_surf_height_l;
1565 int eff_surf_height_c;
1566
1567 int full_swath_bytes_horz_wc_l;
1568 int full_swath_bytes_horz_wc_c;
1569 int full_swath_bytes_vert_wc_l;
1570 int full_swath_bytes_vert_wc_c;
1571 int req128_horz_wc_l;
1572 int req128_horz_wc_c;
1573 int req128_vert_wc_l;
1574 int req128_vert_wc_c;
1575 int segment_order_horz_contiguous_luma;
1576 int segment_order_horz_contiguous_chroma;
1577 int segment_order_vert_contiguous_luma;
1578 int segment_order_vert_contiguous_chroma;
1579
1580 typedef enum {
1581 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1582 } RequestType;
1583 RequestType RequestLuma;
1584 RequestType RequestChroma;
1585
1586 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1587 horz_div_l = 1;
1588 horz_div_c = 1;
1589 vert_div_l = 1;
1590 vert_div_c = 1;
1591
1592 if (BytePerPixelY == 1)
1593 vert_div_l = 0;
1594 if (BytePerPixelC == 1)
1595 vert_div_c = 0;
1596 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1597 horz_div_l = 0;
1598 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1599 horz_div_c = 0;
1600
1601 if (BytePerPixelC == 0) {
1602 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1603 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1604 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1605 } else {
1606 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1607 detile_buf_vp_horz_limit = (double) swath_buf_size
1608 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1609 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1610 detile_buf_vp_vert_limit = (double) swath_buf_size
1611 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1612 }
1613
1614 if (SourcePixelFormat == dm_420_10) {
1615 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1616 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1617 }
1618
1619 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1620 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1621
1622 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1623 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1624 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1625 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1626 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1627 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1628 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1629 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1630
1631 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1632 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1633 if (BytePerPixelC > 0) {
1634 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1635 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1636 } else {
1637 full_swath_bytes_horz_wc_c = 0;
1638 full_swath_bytes_vert_wc_c = 0;
1639 }
1640
1641 if (SourcePixelFormat == dm_420_10) {
1642 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1643 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1644 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1645 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1646 }
1647
1648 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1649 req128_horz_wc_l = 0;
1650 req128_horz_wc_c = 0;
1651 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1652 req128_horz_wc_l = 0;
1653 req128_horz_wc_c = 1;
1654 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1655 req128_horz_wc_l = 1;
1656 req128_horz_wc_c = 0;
1657 } else {
1658 req128_horz_wc_l = 1;
1659 req128_horz_wc_c = 1;
1660 }
1661
1662 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1663 req128_vert_wc_l = 0;
1664 req128_vert_wc_c = 0;
1665 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1666 req128_vert_wc_l = 0;
1667 req128_vert_wc_c = 1;
1668 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1669 req128_vert_wc_l = 1;
1670 req128_vert_wc_c = 0;
1671 } else {
1672 req128_vert_wc_l = 1;
1673 req128_vert_wc_c = 1;
1674 }
1675
1676 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1677 segment_order_horz_contiguous_luma = 0;
1678 } else {
1679 segment_order_horz_contiguous_luma = 1;
1680 }
1681 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1682 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1683 segment_order_vert_contiguous_luma = 0;
1684 } else {
1685 segment_order_vert_contiguous_luma = 1;
1686 }
1687 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1688 segment_order_horz_contiguous_chroma = 0;
1689 } else {
1690 segment_order_horz_contiguous_chroma = 1;
1691 }
1692 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1693 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1694 segment_order_vert_contiguous_chroma = 0;
1695 } else {
1696 segment_order_vert_contiguous_chroma = 1;
1697 }
1698
1699 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1700 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1701 RequestLuma = REQ_256Bytes;
1702 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1703 RequestLuma = REQ_128BytesNonContiguous;
1704 } else {
1705 RequestLuma = REQ_128BytesContiguous;
1706 }
1707 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1708 RequestChroma = REQ_256Bytes;
1709 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1710 RequestChroma = REQ_128BytesNonContiguous;
1711 } else {
1712 RequestChroma = REQ_128BytesContiguous;
1713 }
1714 } else if (ScanOrientation != dm_vert) {
1715 if (req128_horz_wc_l == 0) {
1716 RequestLuma = REQ_256Bytes;
1717 } else if (segment_order_horz_contiguous_luma == 0) {
1718 RequestLuma = REQ_128BytesNonContiguous;
1719 } else {
1720 RequestLuma = REQ_128BytesContiguous;
1721 }
1722 if (req128_horz_wc_c == 0) {
1723 RequestChroma = REQ_256Bytes;
1724 } else if (segment_order_horz_contiguous_chroma == 0) {
1725 RequestChroma = REQ_128BytesNonContiguous;
1726 } else {
1727 RequestChroma = REQ_128BytesContiguous;
1728 }
1729 } else {
1730 if (req128_vert_wc_l == 0) {
1731 RequestLuma = REQ_256Bytes;
1732 } else if (segment_order_vert_contiguous_luma == 0) {
1733 RequestLuma = REQ_128BytesNonContiguous;
1734 } else {
1735 RequestLuma = REQ_128BytesContiguous;
1736 }
1737 if (req128_vert_wc_c == 0) {
1738 RequestChroma = REQ_256Bytes;
1739 } else if (segment_order_vert_contiguous_chroma == 0) {
1740 RequestChroma = REQ_128BytesNonContiguous;
1741 } else {
1742 RequestChroma = REQ_128BytesContiguous;
1743 }
1744 }
1745
1746 if (RequestLuma == REQ_256Bytes) {
1747 *MaxUncompressedBlockLuma = 256;
1748 *MaxCompressedBlockLuma = 256;
1749 *IndependentBlockLuma = 0;
1750 } else if (RequestLuma == REQ_128BytesContiguous) {
1751 *MaxUncompressedBlockLuma = 256;
1752 *MaxCompressedBlockLuma = 128;
1753 *IndependentBlockLuma = 128;
1754 } else {
1755 *MaxUncompressedBlockLuma = 256;
1756 *MaxCompressedBlockLuma = 64;
1757 *IndependentBlockLuma = 64;
1758 }
1759
1760 if (RequestChroma == REQ_256Bytes) {
1761 *MaxUncompressedBlockChroma = 256;
1762 *MaxCompressedBlockChroma = 256;
1763 *IndependentBlockChroma = 0;
1764 } else if (RequestChroma == REQ_128BytesContiguous) {
1765 *MaxUncompressedBlockChroma = 256;
1766 *MaxCompressedBlockChroma = 128;
1767 *IndependentBlockChroma = 128;
1768 } else {
1769 *MaxUncompressedBlockChroma = 256;
1770 *MaxCompressedBlockChroma = 64;
1771 *IndependentBlockChroma = 64;
1772 }
1773
1774 if (DCCEnabled != true || BytePerPixelC == 0) {
1775 *MaxUncompressedBlockChroma = 0;
1776 *MaxCompressedBlockChroma = 0;
1777 *IndependentBlockChroma = 0;
1778 }
1779
1780 if (DCCEnabled != true) {
1781 *MaxUncompressedBlockLuma = 0;
1782 *MaxCompressedBlockLuma = 0;
1783 *IndependentBlockLuma = 0;
1784 }
1785 }
1786
1787 static double CalculatePrefetchSourceLines(
1788 struct display_mode_lib *mode_lib,
1789 double VRatio,
1790 double vtaps,
1791 bool Interlace,
1792 bool ProgressiveToInterlaceUnitInOPP,
1793 unsigned int SwathHeight,
1794 unsigned int ViewportYStart,
1795 double *VInitPreFill,
1796 unsigned int *MaxNumSwath)
1797 {
1798 struct vba_vars_st *v = &mode_lib->vba;
1799 unsigned int MaxPartialSwath;
1800
1801 if (ProgressiveToInterlaceUnitInOPP)
1802 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1803 else
1804 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1805
1806 if (!v->IgnoreViewportPositioning) {
1807
1808 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1809
1810 if (*VInitPreFill > 1.0)
1811 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1812 else
1813 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1814 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1815
1816 } else {
1817
1818 if (ViewportYStart != 0)
1819 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1820
1821 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1822
1823 if (*VInitPreFill > 1.0)
1824 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1825 else
1826 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1827 }
1828
1829 #ifdef __DML_VBA_DEBUG__
1830 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1831 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1832 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1833 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1834 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1835 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1836 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1837 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1838 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1839 #endif
1840 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1841 }
1842
1843 static unsigned int CalculateVMAndRowBytes(
1844 struct display_mode_lib *mode_lib,
1845 bool DCCEnable,
1846 unsigned int BlockHeight256Bytes,
1847 unsigned int BlockWidth256Bytes,
1848 enum source_format_class SourcePixelFormat,
1849 unsigned int SurfaceTiling,
1850 unsigned int BytePerPixel,
1851 enum scan_direction_class ScanDirection,
1852 unsigned int SwathWidth,
1853 unsigned int ViewportHeight,
1854 bool GPUVMEnable,
1855 bool HostVMEnable,
1856 unsigned int HostVMMaxNonCachedPageTableLevels,
1857 unsigned int GPUVMMinPageSize,
1858 unsigned int HostVMMinPageSize,
1859 unsigned int PTEBufferSizeInRequests,
1860 unsigned int Pitch,
1861 unsigned int DCCMetaPitch,
1862 unsigned int *MacroTileWidth,
1863 unsigned int *MetaRowByte,
1864 unsigned int *PixelPTEBytesPerRow,
1865 bool *PTEBufferSizeNotExceeded,
1866 int *dpte_row_width_ub,
1867 unsigned int *dpte_row_height,
1868 unsigned int *MetaRequestWidth,
1869 unsigned int *MetaRequestHeight,
1870 unsigned int *meta_row_width,
1871 unsigned int *meta_row_height,
1872 int *vm_group_bytes,
1873 unsigned int *dpte_group_bytes,
1874 unsigned int *PixelPTEReqWidth,
1875 unsigned int *PixelPTEReqHeight,
1876 unsigned int *PTERequestSize,
1877 int *DPDE0BytesFrame,
1878 int *MetaPTEBytesFrame)
1879 {
1880 struct vba_vars_st *v = &mode_lib->vba;
1881 unsigned int MPDEBytesFrame;
1882 unsigned int DCCMetaSurfaceBytes;
1883 unsigned int MacroTileSizeBytes;
1884 unsigned int MacroTileHeight;
1885 unsigned int ExtraDPDEBytesFrame;
1886 unsigned int PDEAndMetaPTEBytesFrame;
1887 unsigned int PixelPTEReqHeightPTEs = 0;
1888 unsigned int HostVMDynamicLevels = 0;
1889 double FractionOfPTEReturnDrop;
1890
1891 if (GPUVMEnable == true && HostVMEnable == true) {
1892 if (HostVMMinPageSize < 2048) {
1893 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1894 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1895 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1896 } else {
1897 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1898 }
1899 }
1900
1901 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1902 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1903 if (ScanDirection != dm_vert) {
1904 *meta_row_height = *MetaRequestHeight;
1905 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1906 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1907 } else {
1908 *meta_row_height = *MetaRequestWidth;
1909 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1910 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1911 }
1912 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1913 if (GPUVMEnable == true) {
1914 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1915 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1916 } else {
1917 *MetaPTEBytesFrame = 0;
1918 MPDEBytesFrame = 0;
1919 }
1920
1921 if (DCCEnable != true) {
1922 *MetaPTEBytesFrame = 0;
1923 MPDEBytesFrame = 0;
1924 *MetaRowByte = 0;
1925 }
1926
1927 if (SurfaceTiling == dm_sw_linear) {
1928 MacroTileSizeBytes = 256;
1929 MacroTileHeight = BlockHeight256Bytes;
1930 } else {
1931 MacroTileSizeBytes = 65536;
1932 MacroTileHeight = 16 * BlockHeight256Bytes;
1933 }
1934 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1935
1936 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1937 if (ScanDirection != dm_vert) {
1938 *DPDE0BytesFrame = 64
1939 * (dml_ceil(
1940 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1941 / (8 * 2097152),
1942 1) + 1);
1943 } else {
1944 *DPDE0BytesFrame = 64
1945 * (dml_ceil(
1946 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1947 / (8 * 2097152),
1948 1) + 1);
1949 }
1950 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1951 } else {
1952 *DPDE0BytesFrame = 0;
1953 ExtraDPDEBytesFrame = 0;
1954 }
1955
1956 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1957
1958 #ifdef __DML_VBA_DEBUG__
1959 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1960 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1961 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1962 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1963 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1964 #endif
1965
1966 if (HostVMEnable == true) {
1967 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1968 }
1969 #ifdef __DML_VBA_DEBUG__
1970 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1971 #endif
1972
1973 if (SurfaceTiling == dm_sw_linear) {
1974 PixelPTEReqHeightPTEs = 1;
1975 *PixelPTEReqHeight = 1;
1976 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1977 *PTERequestSize = 64;
1978 FractionOfPTEReturnDrop = 0;
1979 } else if (MacroTileSizeBytes == 4096) {
1980 PixelPTEReqHeightPTEs = 1;
1981 *PixelPTEReqHeight = MacroTileHeight;
1982 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1983 *PTERequestSize = 64;
1984 if (ScanDirection != dm_vert)
1985 FractionOfPTEReturnDrop = 0;
1986 else
1987 FractionOfPTEReturnDrop = 7 / 8;
1988 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1989 PixelPTEReqHeightPTEs = 16;
1990 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1991 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1992 *PTERequestSize = 128;
1993 FractionOfPTEReturnDrop = 0;
1994 } else {
1995 PixelPTEReqHeightPTEs = 1;
1996 *PixelPTEReqHeight = MacroTileHeight;
1997 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1998 *PTERequestSize = 64;
1999 FractionOfPTEReturnDrop = 0;
2000 }
2001
2002 if (SurfaceTiling == dm_sw_linear) {
2003 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2004 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2005 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2006 } else if (ScanDirection != dm_vert) {
2007 *dpte_row_height = *PixelPTEReqHeight;
2008 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2009 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2010 } else {
2011 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
2012 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
2013 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2014 }
2015
2016 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
2017 *PTEBufferSizeNotExceeded = true;
2018 } else {
2019 *PTEBufferSizeNotExceeded = false;
2020 }
2021
2022 if (GPUVMEnable != true) {
2023 *PixelPTEBytesPerRow = 0;
2024 *PTEBufferSizeNotExceeded = true;
2025 }
2026
2027 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2028
2029 if (HostVMEnable == true) {
2030 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2031 }
2032
2033 if (HostVMEnable == true) {
2034 *vm_group_bytes = 512;
2035 *dpte_group_bytes = 512;
2036 } else if (GPUVMEnable == true) {
2037 *vm_group_bytes = 2048;
2038 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2039 *dpte_group_bytes = 512;
2040 } else {
2041 *dpte_group_bytes = 2048;
2042 }
2043 } else {
2044 *vm_group_bytes = 0;
2045 *dpte_group_bytes = 0;
2046 }
2047 return PDEAndMetaPTEBytesFrame;
2048 }
2049
2050 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2051 {
2052 struct vba_vars_st *v = &mode_lib->vba;
2053 unsigned int j, k;
2054 double HostVMInefficiencyFactor = 1.0;
2055 bool NoChromaPlanes = true;
2056 int ReorderBytes;
2057 double VMDataOnlyReturnBW;
2058 double MaxTotalRDBandwidth = 0;
2059 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2060
2061 v->WritebackDISPCLK = 0.0;
2062 v->DISPCLKWithRamping = 0;
2063 v->DISPCLKWithoutRamping = 0;
2064 v->GlobalDPPCLK = 0.0;
2065 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2066 {
2067 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2068 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2069 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2070 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2071 if (v->HostVMEnable != true) {
2072 v->ReturnBW = dml_min(
2073 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2074 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2075 } else {
2076 v->ReturnBW = dml_min(
2077 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2078 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2079 }
2080 }
2081 /* End DAL custom code */
2082
2083 // DISPCLK and DPPCLK Calculation
2084 //
2085 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2086 if (v->WritebackEnable[k]) {
2087 v->WritebackDISPCLK = dml_max(
2088 v->WritebackDISPCLK,
2089 dml31_CalculateWriteBackDISPCLK(
2090 v->WritebackPixelFormat[k],
2091 v->PixelClock[k],
2092 v->WritebackHRatio[k],
2093 v->WritebackVRatio[k],
2094 v->WritebackHTaps[k],
2095 v->WritebackVTaps[k],
2096 v->WritebackSourceWidth[k],
2097 v->WritebackDestinationWidth[k],
2098 v->HTotal[k],
2099 v->WritebackLineBufferSize));
2100 }
2101 }
2102
2103 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2104 if (v->HRatio[k] > 1) {
2105 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2106 v->MaxDCHUBToPSCLThroughput,
2107 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2108 } else {
2109 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2110 }
2111
2112 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2113 * dml_max(
2114 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2115 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2116
2117 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2118 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2119 }
2120
2121 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2122 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2123 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2124 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2125 } else {
2126 if (v->HRatioChroma[k] > 1) {
2127 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2128 v->MaxDCHUBToPSCLThroughput,
2129 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2130 } else {
2131 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2132 }
2133 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2134 * dml_max3(
2135 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2136 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2137 1.0);
2138
2139 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2140 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2141 }
2142
2143 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2144 }
2145 }
2146
2147 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2148 if (v->BlendingAndTiming[k] != k)
2149 continue;
2150 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2151 v->DISPCLKWithRamping = dml_max(
2152 v->DISPCLKWithRamping,
2153 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2154 * (1 + v->DISPCLKRampingMargin / 100));
2155 v->DISPCLKWithoutRamping = dml_max(
2156 v->DISPCLKWithoutRamping,
2157 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2158 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2159 v->DISPCLKWithRamping = dml_max(
2160 v->DISPCLKWithRamping,
2161 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2162 * (1 + v->DISPCLKRampingMargin / 100));
2163 v->DISPCLKWithoutRamping = dml_max(
2164 v->DISPCLKWithoutRamping,
2165 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2166 } else {
2167 v->DISPCLKWithRamping = dml_max(
2168 v->DISPCLKWithRamping,
2169 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2170 v->DISPCLKWithoutRamping = dml_max(
2171 v->DISPCLKWithoutRamping,
2172 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2173 }
2174 }
2175
2176 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2177 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2178
2179 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2180 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2181 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2182 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2183 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2184 v->DISPCLKDPPCLKVCOSpeed);
2185 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2186 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2187 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2188 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2189 } else {
2190 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2191 }
2192 v->DISPCLK = v->DISPCLK_calculated;
2193 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2194
2195 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2196 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2197 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2198 }
2199 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2200 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2201 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2202 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2203 }
2204
2205 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2206 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2207 }
2208
2209 // Urgent and B P-State/DRAM Clock Change Watermark
2210 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2211 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2212
2213 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2214 CalculateBytePerPixelAnd256BBlockSizes(
2215 v->SourcePixelFormat[k],
2216 v->SurfaceTiling[k],
2217 &v->BytePerPixelY[k],
2218 &v->BytePerPixelC[k],
2219 &v->BytePerPixelDETY[k],
2220 &v->BytePerPixelDETC[k],
2221 &v->BlockHeight256BytesY[k],
2222 &v->BlockHeight256BytesC[k],
2223 &v->BlockWidth256BytesY[k],
2224 &v->BlockWidth256BytesC[k]);
2225 }
2226
2227 CalculateSwathWidth(
2228 false,
2229 v->NumberOfActivePlanes,
2230 v->SourcePixelFormat,
2231 v->SourceScan,
2232 v->ViewportWidth,
2233 v->ViewportHeight,
2234 v->SurfaceWidthY,
2235 v->SurfaceWidthC,
2236 v->SurfaceHeightY,
2237 v->SurfaceHeightC,
2238 v->ODMCombineEnabled,
2239 v->BytePerPixelY,
2240 v->BytePerPixelC,
2241 v->BlockHeight256BytesY,
2242 v->BlockHeight256BytesC,
2243 v->BlockWidth256BytesY,
2244 v->BlockWidth256BytesC,
2245 v->BlendingAndTiming,
2246 v->HActive,
2247 v->HRatio,
2248 v->DPPPerPlane,
2249 v->SwathWidthSingleDPPY,
2250 v->SwathWidthSingleDPPC,
2251 v->SwathWidthY,
2252 v->SwathWidthC,
2253 v->dummyinteger3,
2254 v->dummyinteger4,
2255 v->swath_width_luma_ub,
2256 v->swath_width_chroma_ub);
2257
2258 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2259 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2260 * v->VRatio[k];
2261 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2262 * v->VRatioChroma[k];
2263 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2264 }
2265
2266 // DCFCLK Deep Sleep
2267 CalculateDCFCLKDeepSleep(
2268 mode_lib,
2269 v->NumberOfActivePlanes,
2270 v->BytePerPixelY,
2271 v->BytePerPixelC,
2272 v->VRatio,
2273 v->VRatioChroma,
2274 v->SwathWidthY,
2275 v->SwathWidthC,
2276 v->DPPPerPlane,
2277 v->HRatio,
2278 v->HRatioChroma,
2279 v->PixelClock,
2280 v->PSCL_THROUGHPUT_LUMA,
2281 v->PSCL_THROUGHPUT_CHROMA,
2282 v->DPPCLK,
2283 v->ReadBandwidthPlaneLuma,
2284 v->ReadBandwidthPlaneChroma,
2285 v->ReturnBusWidth,
2286 &v->DCFCLKDeepSleep);
2287
2288 // DSCCLK
2289 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2290 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2291 v->DSCCLK_calculated[k] = 0.0;
2292 } else {
2293 if (v->OutputFormat[k] == dm_420)
2294 v->DSCFormatFactor = 2;
2295 else if (v->OutputFormat[k] == dm_444)
2296 v->DSCFormatFactor = 1;
2297 else if (v->OutputFormat[k] == dm_n422)
2298 v->DSCFormatFactor = 2;
2299 else
2300 v->DSCFormatFactor = 1;
2301 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2302 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2303 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2304 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2305 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2306 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2307 else
2308 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2309 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2310 }
2311 }
2312
2313 // DSC Delay
2314 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2315 double BPP = v->OutputBpp[k];
2316
2317 if (v->DSCEnabled[k] && BPP != 0) {
2318 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2319 v->DSCDelay[k] = dscceComputeDelay(
2320 v->DSCInputBitPerComponent[k],
2321 BPP,
2322 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2323 v->NumberOfDSCSlices[k],
2324 v->OutputFormat[k],
2325 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2326 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2327 v->DSCDelay[k] = 2
2328 * (dscceComputeDelay(
2329 v->DSCInputBitPerComponent[k],
2330 BPP,
2331 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2332 v->NumberOfDSCSlices[k] / 2.0,
2333 v->OutputFormat[k],
2334 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2335 } else {
2336 v->DSCDelay[k] = 4
2337 * (dscceComputeDelay(
2338 v->DSCInputBitPerComponent[k],
2339 BPP,
2340 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2341 v->NumberOfDSCSlices[k] / 4.0,
2342 v->OutputFormat[k],
2343 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2344 }
2345 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2346 } else {
2347 v->DSCDelay[k] = 0;
2348 }
2349 }
2350
2351 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2352 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2353 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2354 v->DSCDelay[k] = v->DSCDelay[j];
2355
2356 // Prefetch
2357 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2358 unsigned int PDEAndMetaPTEBytesFrameY;
2359 unsigned int PixelPTEBytesPerRowY;
2360 unsigned int MetaRowByteY;
2361 unsigned int MetaRowByteC;
2362 unsigned int PDEAndMetaPTEBytesFrameC;
2363 unsigned int PixelPTEBytesPerRowC;
2364 bool PTEBufferSizeNotExceededY;
2365 bool PTEBufferSizeNotExceededC;
2366
2367 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2368 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2369 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2370 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2371 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2372 } else {
2373 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2374 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2375 }
2376
2377 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2378 mode_lib,
2379 v->DCCEnable[k],
2380 v->BlockHeight256BytesC[k],
2381 v->BlockWidth256BytesC[k],
2382 v->SourcePixelFormat[k],
2383 v->SurfaceTiling[k],
2384 v->BytePerPixelC[k],
2385 v->SourceScan[k],
2386 v->SwathWidthC[k],
2387 v->ViewportHeightChroma[k],
2388 v->GPUVMEnable,
2389 v->HostVMEnable,
2390 v->HostVMMaxNonCachedPageTableLevels,
2391 v->GPUVMMinPageSize,
2392 v->HostVMMinPageSize,
2393 v->PTEBufferSizeInRequestsForChroma,
2394 v->PitchC[k],
2395 v->DCCMetaPitchC[k],
2396 &v->MacroTileWidthC[k],
2397 &MetaRowByteC,
2398 &PixelPTEBytesPerRowC,
2399 &PTEBufferSizeNotExceededC,
2400 &v->dpte_row_width_chroma_ub[k],
2401 &v->dpte_row_height_chroma[k],
2402 &v->meta_req_width_chroma[k],
2403 &v->meta_req_height_chroma[k],
2404 &v->meta_row_width_chroma[k],
2405 &v->meta_row_height_chroma[k],
2406 &v->dummyinteger1,
2407 &v->dummyinteger2,
2408 &v->PixelPTEReqWidthC[k],
2409 &v->PixelPTEReqHeightC[k],
2410 &v->PTERequestSizeC[k],
2411 &v->dpde0_bytes_per_frame_ub_c[k],
2412 &v->meta_pte_bytes_per_frame_ub_c[k]);
2413
2414 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2415 mode_lib,
2416 v->VRatioChroma[k],
2417 v->VTAPsChroma[k],
2418 v->Interlace[k],
2419 v->ProgressiveToInterlaceUnitInOPP,
2420 v->SwathHeightC[k],
2421 v->ViewportYStartC[k],
2422 &v->VInitPreFillC[k],
2423 &v->MaxNumSwathC[k]);
2424 } else {
2425 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2426 v->PTEBufferSizeInRequestsForChroma = 0;
2427 PixelPTEBytesPerRowC = 0;
2428 PDEAndMetaPTEBytesFrameC = 0;
2429 MetaRowByteC = 0;
2430 v->MaxNumSwathC[k] = 0;
2431 v->PrefetchSourceLinesC[k] = 0;
2432 }
2433
2434 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2435 mode_lib,
2436 v->DCCEnable[k],
2437 v->BlockHeight256BytesY[k],
2438 v->BlockWidth256BytesY[k],
2439 v->SourcePixelFormat[k],
2440 v->SurfaceTiling[k],
2441 v->BytePerPixelY[k],
2442 v->SourceScan[k],
2443 v->SwathWidthY[k],
2444 v->ViewportHeight[k],
2445 v->GPUVMEnable,
2446 v->HostVMEnable,
2447 v->HostVMMaxNonCachedPageTableLevels,
2448 v->GPUVMMinPageSize,
2449 v->HostVMMinPageSize,
2450 v->PTEBufferSizeInRequestsForLuma,
2451 v->PitchY[k],
2452 v->DCCMetaPitchY[k],
2453 &v->MacroTileWidthY[k],
2454 &MetaRowByteY,
2455 &PixelPTEBytesPerRowY,
2456 &PTEBufferSizeNotExceededY,
2457 &v->dpte_row_width_luma_ub[k],
2458 &v->dpte_row_height[k],
2459 &v->meta_req_width[k],
2460 &v->meta_req_height[k],
2461 &v->meta_row_width[k],
2462 &v->meta_row_height[k],
2463 &v->vm_group_bytes[k],
2464 &v->dpte_group_bytes[k],
2465 &v->PixelPTEReqWidthY[k],
2466 &v->PixelPTEReqHeightY[k],
2467 &v->PTERequestSizeY[k],
2468 &v->dpde0_bytes_per_frame_ub_l[k],
2469 &v->meta_pte_bytes_per_frame_ub_l[k]);
2470
2471 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2472 mode_lib,
2473 v->VRatio[k],
2474 v->vtaps[k],
2475 v->Interlace[k],
2476 v->ProgressiveToInterlaceUnitInOPP,
2477 v->SwathHeightY[k],
2478 v->ViewportYStartY[k],
2479 &v->VInitPreFillY[k],
2480 &v->MaxNumSwathY[k]);
2481 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2482 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2483 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2484
2485 CalculateRowBandwidth(
2486 v->GPUVMEnable,
2487 v->SourcePixelFormat[k],
2488 v->VRatio[k],
2489 v->VRatioChroma[k],
2490 v->DCCEnable[k],
2491 v->HTotal[k] / v->PixelClock[k],
2492 MetaRowByteY,
2493 MetaRowByteC,
2494 v->meta_row_height[k],
2495 v->meta_row_height_chroma[k],
2496 PixelPTEBytesPerRowY,
2497 PixelPTEBytesPerRowC,
2498 v->dpte_row_height[k],
2499 v->dpte_row_height_chroma[k],
2500 &v->meta_row_bw[k],
2501 &v->dpte_row_bw[k]);
2502 }
2503
2504 v->TotalDCCActiveDPP = 0;
2505 v->TotalActiveDPP = 0;
2506 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2507 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2508 if (v->DCCEnable[k])
2509 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2510 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2511 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2512 NoChromaPlanes = false;
2513 }
2514
2515 ReorderBytes = v->NumberOfChannels
2516 * dml_max3(
2517 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2518 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2519 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2520
2521 VMDataOnlyReturnBW = dml_min(
2522 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2523 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2524 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2525 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2526
2527 #ifdef __DML_VBA_DEBUG__
2528 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2529 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2530 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2531 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2532 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2533 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2534 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2535 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2536 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2537 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2538 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2539 #endif
2540
2541 if (v->GPUVMEnable && v->HostVMEnable)
2542 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2543
2544 v->UrgentExtraLatency = CalculateExtraLatency(
2545 v->RoundTripPingLatencyCycles,
2546 ReorderBytes,
2547 v->DCFCLK,
2548 v->TotalActiveDPP,
2549 v->PixelChunkSizeInKByte,
2550 v->TotalDCCActiveDPP,
2551 v->MetaChunkSize,
2552 v->ReturnBW,
2553 v->GPUVMEnable,
2554 v->HostVMEnable,
2555 v->NumberOfActivePlanes,
2556 v->DPPPerPlane,
2557 v->dpte_group_bytes,
2558 HostVMInefficiencyFactor,
2559 v->HostVMMinPageSize,
2560 v->HostVMMaxNonCachedPageTableLevels);
2561
2562 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2563
2564 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2565 if (v->BlendingAndTiming[k] == k) {
2566 if (v->WritebackEnable[k] == true) {
2567 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2568 + CalculateWriteBackDelay(
2569 v->WritebackPixelFormat[k],
2570 v->WritebackHRatio[k],
2571 v->WritebackVRatio[k],
2572 v->WritebackVTaps[k],
2573 v->WritebackDestinationWidth[k],
2574 v->WritebackDestinationHeight[k],
2575 v->WritebackSourceHeight[k],
2576 v->HTotal[k]) / v->DISPCLK;
2577 } else
2578 v->WritebackDelay[v->VoltageLevel][k] = 0;
2579 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2580 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2581 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2582 v->WritebackDelay[v->VoltageLevel][k],
2583 v->WritebackLatency
2584 + CalculateWriteBackDelay(
2585 v->WritebackPixelFormat[j],
2586 v->WritebackHRatio[j],
2587 v->WritebackVRatio[j],
2588 v->WritebackVTaps[j],
2589 v->WritebackDestinationWidth[j],
2590 v->WritebackDestinationHeight[j],
2591 v->WritebackSourceHeight[j],
2592 v->HTotal[k]) / v->DISPCLK);
2593 }
2594 }
2595 }
2596 }
2597
2598 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2599 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2600 if (v->BlendingAndTiming[k] == j)
2601 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2602
2603 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2604 v->MaxVStartupLines[k] =
2605 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2606 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2607 v->VTotal[k] - v->VActive[k]
2608 - dml_max(
2609 1.0,
2610 dml_ceil(
2611 (double) v->WritebackDelay[v->VoltageLevel][k]
2612 / (v->HTotal[k] / v->PixelClock[k]),
2613 1));
2614 if (v->MaxVStartupLines[k] > 1023)
2615 v->MaxVStartupLines[k] = 1023;
2616
2617 #ifdef __DML_VBA_DEBUG__
2618 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2619 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2620 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2621 #endif
2622 }
2623
2624 v->MaximumMaxVStartupLines = 0;
2625 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2626 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2627
2628 // VBA_DELTA
2629 // We don't really care to iterate between the various prefetch modes
2630 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2631
2632 v->UrgentLatency = CalculateUrgentLatency(
2633 v->UrgentLatencyPixelDataOnly,
2634 v->UrgentLatencyPixelMixedWithVMData,
2635 v->UrgentLatencyVMDataOnly,
2636 v->DoUrgentLatencyAdjustment,
2637 v->UrgentLatencyAdjustmentFabricClockComponent,
2638 v->UrgentLatencyAdjustmentFabricClockReference,
2639 v->FabricClock);
2640
2641 v->FractionOfUrgentBandwidth = 0.0;
2642 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2643
2644 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2645
2646 do {
2647 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2648 bool DestinationLineTimesForPrefetchLessThan2 = false;
2649 bool VRatioPrefetchMoreThan4 = false;
2650 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2651 MaxTotalRDBandwidth = 0;
2652
2653 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2654
2655 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2656 Pipe myPipe;
2657
2658 myPipe.DPPCLK = v->DPPCLK[k];
2659 myPipe.DISPCLK = v->DISPCLK;
2660 myPipe.PixelClock = v->PixelClock[k];
2661 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2662 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2663 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2664 myPipe.SourceScan = v->SourceScan[k];
2665 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2666 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2667 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2668 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2669 myPipe.InterlaceEnable = v->Interlace[k];
2670 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2671 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2672 myPipe.HTotal = v->HTotal[k];
2673 myPipe.DCCEnable = v->DCCEnable[k];
2674 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2675 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2676 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2677 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2678 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2679 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2680 v->ErrorResult[k] = CalculatePrefetchSchedule(
2681 mode_lib,
2682 HostVMInefficiencyFactor,
2683 &myPipe,
2684 v->DSCDelay[k],
2685 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2686 v->DPPCLKDelaySCL,
2687 v->DPPCLKDelaySCLLBOnly,
2688 v->DPPCLKDelayCNVCCursor,
2689 v->DISPCLKDelaySubtotal,
2690 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2691 v->OutputFormat[k],
2692 v->MaxInterDCNTileRepeaters,
2693 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2694 v->MaxVStartupLines[k],
2695 v->GPUVMMaxPageTableLevels,
2696 v->GPUVMEnable,
2697 v->HostVMEnable,
2698 v->HostVMMaxNonCachedPageTableLevels,
2699 v->HostVMMinPageSize,
2700 v->DynamicMetadataEnable[k],
2701 v->DynamicMetadataVMEnabled,
2702 v->DynamicMetadataLinesBeforeActiveRequired[k],
2703 v->DynamicMetadataTransmittedBytes[k],
2704 v->UrgentLatency,
2705 v->UrgentExtraLatency,
2706 v->TCalc,
2707 v->PDEAndMetaPTEBytesFrame[k],
2708 v->MetaRowByte[k],
2709 v->PixelPTEBytesPerRow[k],
2710 v->PrefetchSourceLinesY[k],
2711 v->SwathWidthY[k],
2712 v->VInitPreFillY[k],
2713 v->MaxNumSwathY[k],
2714 v->PrefetchSourceLinesC[k],
2715 v->SwathWidthC[k],
2716 v->VInitPreFillC[k],
2717 v->MaxNumSwathC[k],
2718 v->swath_width_luma_ub[k],
2719 v->swath_width_chroma_ub[k],
2720 v->SwathHeightY[k],
2721 v->SwathHeightC[k],
2722 TWait,
2723 &v->DSTXAfterScaler[k],
2724 &v->DSTYAfterScaler[k],
2725 &v->DestinationLinesForPrefetch[k],
2726 &v->PrefetchBandwidth[k],
2727 &v->DestinationLinesToRequestVMInVBlank[k],
2728 &v->DestinationLinesToRequestRowInVBlank[k],
2729 &v->VRatioPrefetchY[k],
2730 &v->VRatioPrefetchC[k],
2731 &v->RequiredPrefetchPixDataBWLuma[k],
2732 &v->RequiredPrefetchPixDataBWChroma[k],
2733 &v->NotEnoughTimeForDynamicMetadata[k],
2734 &v->Tno_bw[k],
2735 &v->prefetch_vmrow_bw[k],
2736 &v->Tdmdl_vm[k],
2737 &v->Tdmdl[k],
2738 &v->TSetup[k],
2739 &v->VUpdateOffsetPix[k],
2740 &v->VUpdateWidthPix[k],
2741 &v->VReadyOffsetPix[k]);
2742
2743 #ifdef __DML_VBA_DEBUG__
2744 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2745 #endif
2746 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2747 }
2748
2749 v->NoEnoughUrgentLatencyHiding = false;
2750 v->NoEnoughUrgentLatencyHidingPre = false;
2751
2752 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2753 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2754 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2755 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2756 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2757
2758 CalculateUrgentBurstFactor(
2759 v->swath_width_luma_ub[k],
2760 v->swath_width_chroma_ub[k],
2761 v->SwathHeightY[k],
2762 v->SwathHeightC[k],
2763 v->HTotal[k] / v->PixelClock[k],
2764 v->UrgentLatency,
2765 v->CursorBufferSize,
2766 v->CursorWidth[k][0],
2767 v->CursorBPP[k][0],
2768 v->VRatio[k],
2769 v->VRatioChroma[k],
2770 v->BytePerPixelDETY[k],
2771 v->BytePerPixelDETC[k],
2772 v->DETBufferSizeY[k],
2773 v->DETBufferSizeC[k],
2774 &v->UrgBurstFactorCursor[k],
2775 &v->UrgBurstFactorLuma[k],
2776 &v->UrgBurstFactorChroma[k],
2777 &v->NoUrgentLatencyHiding[k]);
2778
2779 CalculateUrgentBurstFactor(
2780 v->swath_width_luma_ub[k],
2781 v->swath_width_chroma_ub[k],
2782 v->SwathHeightY[k],
2783 v->SwathHeightC[k],
2784 v->HTotal[k] / v->PixelClock[k],
2785 v->UrgentLatency,
2786 v->CursorBufferSize,
2787 v->CursorWidth[k][0],
2788 v->CursorBPP[k][0],
2789 v->VRatioPrefetchY[k],
2790 v->VRatioPrefetchC[k],
2791 v->BytePerPixelDETY[k],
2792 v->BytePerPixelDETC[k],
2793 v->DETBufferSizeY[k],
2794 v->DETBufferSizeC[k],
2795 &v->UrgBurstFactorCursorPre[k],
2796 &v->UrgBurstFactorLumaPre[k],
2797 &v->UrgBurstFactorChromaPre[k],
2798 &v->NoUrgentLatencyHidingPre[k]);
2799
2800 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2801 + dml_max3(
2802 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2803 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2804 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2805 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2806 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2807 v->DPPPerPlane[k]
2808 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2809 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2810 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2811
2812 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2813 + dml_max3(
2814 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2815 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2816 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2817 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2818 + v->cursor_bw_pre[k]);
2819
2820 #ifdef __DML_VBA_DEBUG__
2821 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2822 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2823 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2824 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2825 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2826
2827 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2828 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2829
2830 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2831 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2832 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2833 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2834 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2835 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2836 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2837 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2838 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2839 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2840 #endif
2841
2842 if (v->DestinationLinesForPrefetch[k] < 2)
2843 DestinationLineTimesForPrefetchLessThan2 = true;
2844
2845 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2846 VRatioPrefetchMoreThan4 = true;
2847
2848 if (v->NoUrgentLatencyHiding[k] == true)
2849 v->NoEnoughUrgentLatencyHiding = true;
2850
2851 if (v->NoUrgentLatencyHidingPre[k] == true)
2852 v->NoEnoughUrgentLatencyHidingPre = true;
2853 }
2854
2855 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2856
2857 #ifdef __DML_VBA_DEBUG__
2858 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2859 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2860 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2861 #endif
2862
2863 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2864 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2865 v->PrefetchModeSupported = true;
2866 else {
2867 v->PrefetchModeSupported = false;
2868 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2869 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2870 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2871 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2872 }
2873
2874 // PREVIOUS_ERROR
2875 // This error result check was done after the PrefetchModeSupported. So we will
2876 // still try to calculate flip schedule even prefetch mode not supported
2877 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2878 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2879 v->PrefetchModeSupported = false;
2880 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2881 }
2882 }
2883
2884 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2885 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2886 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2887 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2888 - dml_max(
2889 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2890 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2891 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2892 v->DPPPerPlane[k]
2893 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2894 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2895 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2896 }
2897
2898 v->TotImmediateFlipBytes = 0;
2899 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2900 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2901 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2902 }
2903 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2904 CalculateFlipSchedule(
2905 mode_lib,
2906 k,
2907 HostVMInefficiencyFactor,
2908 v->UrgentExtraLatency,
2909 v->UrgentLatency,
2910 v->PDEAndMetaPTEBytesFrame[k],
2911 v->MetaRowByte[k],
2912 v->PixelPTEBytesPerRow[k]);
2913 }
2914
2915 v->total_dcn_read_bw_with_flip = 0.0;
2916 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2917 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2918 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2919 + dml_max3(
2920 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2921 v->DPPPerPlane[k] * v->final_flip_bw[k]
2922 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2923 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2924 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2925 v->DPPPerPlane[k]
2926 * (v->final_flip_bw[k]
2927 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2928 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2929 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2930 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2931 + dml_max3(
2932 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2933 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2934 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2935 v->DPPPerPlane[k]
2936 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2937 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2938 }
2939 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2940
2941 v->ImmediateFlipSupported = true;
2942 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2943 #ifdef __DML_VBA_DEBUG__
2944 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2945 #endif
2946 v->ImmediateFlipSupported = false;
2947 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2948 }
2949 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2950 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2951 #ifdef __DML_VBA_DEBUG__
2952 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2953 __func__, k);
2954 #endif
2955 v->ImmediateFlipSupported = false;
2956 }
2957 }
2958 } else {
2959 v->ImmediateFlipSupported = false;
2960 }
2961
2962 v->PrefetchAndImmediateFlipSupported =
2963 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2964 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2965 v->ImmediateFlipSupported)) ? true : false;
2966 #ifdef __DML_VBA_DEBUG__
2967 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2968 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2969 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2970 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2971 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2972 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2973 #endif
2974 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2975
2976 v->VStartupLines = v->VStartupLines + 1;
2977 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2978 ASSERT(v->PrefetchAndImmediateFlipSupported);
2979
2980 // Unbounded Request Enabled
2981 CalculateUnboundedRequestAndCompressedBufferSize(
2982 v->DETBufferSizeInKByte[0],
2983 v->ConfigReturnBufferSizeInKByte,
2984 v->UseUnboundedRequesting,
2985 v->TotalActiveDPP,
2986 NoChromaPlanes,
2987 v->MaxNumDPP,
2988 v->CompressedBufferSegmentSizeInkByte,
2989 v->Output,
2990 &v->UnboundedRequestEnabled,
2991 &v->CompressedBufferSizeInkByte);
2992
2993 //Watermarks and NB P-State/DRAM Clock Change Support
2994 {
2995 enum clock_change_support DRAMClockChangeSupport; // dummy
2996 CalculateWatermarksAndDRAMSpeedChangeSupport(
2997 mode_lib,
2998 PrefetchMode,
2999 v->DCFCLK,
3000 v->ReturnBW,
3001 v->UrgentLatency,
3002 v->UrgentExtraLatency,
3003 v->SOCCLK,
3004 v->DCFCLKDeepSleep,
3005 v->DETBufferSizeY,
3006 v->DETBufferSizeC,
3007 v->SwathHeightY,
3008 v->SwathHeightC,
3009 v->SwathWidthY,
3010 v->SwathWidthC,
3011 v->DPPPerPlane,
3012 v->BytePerPixelDETY,
3013 v->BytePerPixelDETC,
3014 v->UnboundedRequestEnabled,
3015 v->CompressedBufferSizeInkByte,
3016 &DRAMClockChangeSupport,
3017 &v->StutterExitWatermark,
3018 &v->StutterEnterPlusExitWatermark,
3019 &v->Z8StutterExitWatermark,
3020 &v->Z8StutterEnterPlusExitWatermark);
3021
3022 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3023 if (v->WritebackEnable[k] == true) {
3024 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3025 0,
3026 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3027 } else {
3028 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3029 }
3030 }
3031 }
3032
3033 //Display Pipeline Delivery Time in Prefetch, Groups
3034 CalculatePixelDeliveryTimes(
3035 v->NumberOfActivePlanes,
3036 v->VRatio,
3037 v->VRatioChroma,
3038 v->VRatioPrefetchY,
3039 v->VRatioPrefetchC,
3040 v->swath_width_luma_ub,
3041 v->swath_width_chroma_ub,
3042 v->DPPPerPlane,
3043 v->HRatio,
3044 v->HRatioChroma,
3045 v->PixelClock,
3046 v->PSCL_THROUGHPUT_LUMA,
3047 v->PSCL_THROUGHPUT_CHROMA,
3048 v->DPPCLK,
3049 v->BytePerPixelC,
3050 v->SourceScan,
3051 v->NumberOfCursors,
3052 v->CursorWidth,
3053 v->CursorBPP,
3054 v->BlockWidth256BytesY,
3055 v->BlockHeight256BytesY,
3056 v->BlockWidth256BytesC,
3057 v->BlockHeight256BytesC,
3058 v->DisplayPipeLineDeliveryTimeLuma,
3059 v->DisplayPipeLineDeliveryTimeChroma,
3060 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3061 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3062 v->DisplayPipeRequestDeliveryTimeLuma,
3063 v->DisplayPipeRequestDeliveryTimeChroma,
3064 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3065 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3066 v->CursorRequestDeliveryTime,
3067 v->CursorRequestDeliveryTimePrefetch);
3068
3069 CalculateMetaAndPTETimes(
3070 v->NumberOfActivePlanes,
3071 v->GPUVMEnable,
3072 v->MetaChunkSize,
3073 v->MinMetaChunkSizeBytes,
3074 v->HTotal,
3075 v->VRatio,
3076 v->VRatioChroma,
3077 v->DestinationLinesToRequestRowInVBlank,
3078 v->DestinationLinesToRequestRowInImmediateFlip,
3079 v->DCCEnable,
3080 v->PixelClock,
3081 v->BytePerPixelY,
3082 v->BytePerPixelC,
3083 v->SourceScan,
3084 v->dpte_row_height,
3085 v->dpte_row_height_chroma,
3086 v->meta_row_width,
3087 v->meta_row_width_chroma,
3088 v->meta_row_height,
3089 v->meta_row_height_chroma,
3090 v->meta_req_width,
3091 v->meta_req_width_chroma,
3092 v->meta_req_height,
3093 v->meta_req_height_chroma,
3094 v->dpte_group_bytes,
3095 v->PTERequestSizeY,
3096 v->PTERequestSizeC,
3097 v->PixelPTEReqWidthY,
3098 v->PixelPTEReqHeightY,
3099 v->PixelPTEReqWidthC,
3100 v->PixelPTEReqHeightC,
3101 v->dpte_row_width_luma_ub,
3102 v->dpte_row_width_chroma_ub,
3103 v->DST_Y_PER_PTE_ROW_NOM_L,
3104 v->DST_Y_PER_PTE_ROW_NOM_C,
3105 v->DST_Y_PER_META_ROW_NOM_L,
3106 v->DST_Y_PER_META_ROW_NOM_C,
3107 v->TimePerMetaChunkNominal,
3108 v->TimePerChromaMetaChunkNominal,
3109 v->TimePerMetaChunkVBlank,
3110 v->TimePerChromaMetaChunkVBlank,
3111 v->TimePerMetaChunkFlip,
3112 v->TimePerChromaMetaChunkFlip,
3113 v->time_per_pte_group_nom_luma,
3114 v->time_per_pte_group_vblank_luma,
3115 v->time_per_pte_group_flip_luma,
3116 v->time_per_pte_group_nom_chroma,
3117 v->time_per_pte_group_vblank_chroma,
3118 v->time_per_pte_group_flip_chroma);
3119
3120 CalculateVMGroupAndRequestTimes(
3121 v->NumberOfActivePlanes,
3122 v->GPUVMEnable,
3123 v->GPUVMMaxPageTableLevels,
3124 v->HTotal,
3125 v->BytePerPixelC,
3126 v->DestinationLinesToRequestVMInVBlank,
3127 v->DestinationLinesToRequestVMInImmediateFlip,
3128 v->DCCEnable,
3129 v->PixelClock,
3130 v->dpte_row_width_luma_ub,
3131 v->dpte_row_width_chroma_ub,
3132 v->vm_group_bytes,
3133 v->dpde0_bytes_per_frame_ub_l,
3134 v->dpde0_bytes_per_frame_ub_c,
3135 v->meta_pte_bytes_per_frame_ub_l,
3136 v->meta_pte_bytes_per_frame_ub_c,
3137 v->TimePerVMGroupVBlank,
3138 v->TimePerVMGroupFlip,
3139 v->TimePerVMRequestVBlank,
3140 v->TimePerVMRequestFlip);
3141
3142 // Min TTUVBlank
3143 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3144 if (PrefetchMode == 0) {
3145 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3146 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3147 v->MinTTUVBlank[k] = dml_max(
3148 v->DRAMClockChangeWatermark,
3149 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3150 } else if (PrefetchMode == 1) {
3151 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3152 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3153 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3154 } else {
3155 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3156 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3157 v->MinTTUVBlank[k] = v->UrgentWatermark;
3158 }
3159 if (!v->DynamicMetadataEnable[k])
3160 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3161 }
3162
3163 // DCC Configuration
3164 v->ActiveDPPs = 0;
3165 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3166 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3167 v->SourcePixelFormat[k],
3168 v->SurfaceWidthY[k],
3169 v->SurfaceWidthC[k],
3170 v->SurfaceHeightY[k],
3171 v->SurfaceHeightC[k],
3172 v->DETBufferSizeInKByte[0] * 1024,
3173 v->BlockHeight256BytesY[k],
3174 v->BlockHeight256BytesC[k],
3175 v->SurfaceTiling[k],
3176 v->BytePerPixelY[k],
3177 v->BytePerPixelC[k],
3178 v->BytePerPixelDETY[k],
3179 v->BytePerPixelDETC[k],
3180 v->SourceScan[k],
3181 &v->DCCYMaxUncompressedBlock[k],
3182 &v->DCCCMaxUncompressedBlock[k],
3183 &v->DCCYMaxCompressedBlock[k],
3184 &v->DCCCMaxCompressedBlock[k],
3185 &v->DCCYIndependentBlock[k],
3186 &v->DCCCIndependentBlock[k]);
3187 }
3188
3189 // VStartup Adjustment
3190 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3191 bool isInterlaceTiming;
3192 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3193 #ifdef __DML_VBA_DEBUG__
3194 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3195 #endif
3196
3197 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3198
3199 #ifdef __DML_VBA_DEBUG__
3200 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3201 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3202 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3203 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3204 #endif
3205
3206 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3207 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3208 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3209 }
3210
3211 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3212
3213 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3214 - v->VFrontPorch[k])
3215 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3216 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3217
3218 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3219
3220 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3221 <= (isInterlaceTiming ?
3222 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3223 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3224 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3225 } else {
3226 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3227 }
3228 #ifdef __DML_VBA_DEBUG__
3229 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3230 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3231 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3232 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3233 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3234 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3235 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3236 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3237 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3238 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3239 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3240 #endif
3241 }
3242
3243 {
3244 //Maximum Bandwidth Used
3245 double TotalWRBandwidth = 0;
3246 double MaxPerPlaneVActiveWRBandwidth = 0;
3247 double WRBandwidth = 0;
3248 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3249 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3250 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3251 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3252 } else if (v->WritebackEnable[k] == true) {
3253 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3254 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3255 }
3256 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3257 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3258 }
3259
3260 v->TotalDataReadBandwidth = 0;
3261 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3262 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3263 }
3264 }
3265 // Stutter Efficiency
3266 CalculateStutterEfficiency(
3267 mode_lib,
3268 v->CompressedBufferSizeInkByte,
3269 v->UnboundedRequestEnabled,
3270 v->ConfigReturnBufferSizeInKByte,
3271 v->MetaFIFOSizeInKEntries,
3272 v->ZeroSizeBufferEntries,
3273 v->NumberOfActivePlanes,
3274 v->ROBBufferSizeInKByte,
3275 v->TotalDataReadBandwidth,
3276 v->DCFCLK,
3277 v->ReturnBW,
3278 v->COMPBUF_RESERVED_SPACE_64B,
3279 v->COMPBUF_RESERVED_SPACE_ZS,
3280 v->SRExitTime,
3281 v->SRExitZ8Time,
3282 v->SynchronizedVBlank,
3283 v->StutterEnterPlusExitWatermark,
3284 v->Z8StutterEnterPlusExitWatermark,
3285 v->ProgressiveToInterlaceUnitInOPP,
3286 v->Interlace,
3287 v->MinTTUVBlank,
3288 v->DPPPerPlane,
3289 v->DETBufferSizeY,
3290 v->BytePerPixelY,
3291 v->BytePerPixelDETY,
3292 v->SwathWidthY,
3293 v->SwathHeightY,
3294 v->SwathHeightC,
3295 v->DCCRateLuma,
3296 v->DCCRateChroma,
3297 v->DCCFractionOfZeroSizeRequestsLuma,
3298 v->DCCFractionOfZeroSizeRequestsChroma,
3299 v->HTotal,
3300 v->VTotal,
3301 v->PixelClock,
3302 v->VRatio,
3303 v->SourceScan,
3304 v->BlockHeight256BytesY,
3305 v->BlockWidth256BytesY,
3306 v->BlockHeight256BytesC,
3307 v->BlockWidth256BytesC,
3308 v->DCCYMaxUncompressedBlock,
3309 v->DCCCMaxUncompressedBlock,
3310 v->VActive,
3311 v->DCCEnable,
3312 v->WritebackEnable,
3313 v->ReadBandwidthPlaneLuma,
3314 v->ReadBandwidthPlaneChroma,
3315 v->meta_row_bw,
3316 v->dpte_row_bw,
3317 &v->StutterEfficiencyNotIncludingVBlank,
3318 &v->StutterEfficiency,
3319 &v->NumberOfStutterBurstsPerFrame,
3320 &v->Z8StutterEfficiencyNotIncludingVBlank,
3321 &v->Z8StutterEfficiency,
3322 &v->Z8NumberOfStutterBurstsPerFrame,
3323 &v->StutterPeriod);
3324 }
3325
3326 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3327 {
3328 struct vba_vars_st *v = &mode_lib->vba;
3329 // Display Pipe Configuration
3330 double BytePerPixDETY[DC__NUM_DPP__MAX];
3331 double BytePerPixDETC[DC__NUM_DPP__MAX];
3332 int BytePerPixY[DC__NUM_DPP__MAX];
3333 int BytePerPixC[DC__NUM_DPP__MAX];
3334 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3335 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3336 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3337 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3338 double dummy1[DC__NUM_DPP__MAX];
3339 double dummy2[DC__NUM_DPP__MAX];
3340 double dummy3[DC__NUM_DPP__MAX];
3341 double dummy4[DC__NUM_DPP__MAX];
3342 int dummy5[DC__NUM_DPP__MAX];
3343 int dummy6[DC__NUM_DPP__MAX];
3344 bool dummy7[DC__NUM_DPP__MAX];
3345 bool dummysinglestring;
3346
3347 unsigned int k;
3348
3349 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3350
3351 CalculateBytePerPixelAnd256BBlockSizes(
3352 v->SourcePixelFormat[k],
3353 v->SurfaceTiling[k],
3354 &BytePerPixY[k],
3355 &BytePerPixC[k],
3356 &BytePerPixDETY[k],
3357 &BytePerPixDETC[k],
3358 &Read256BytesBlockHeightY[k],
3359 &Read256BytesBlockHeightC[k],
3360 &Read256BytesBlockWidthY[k],
3361 &Read256BytesBlockWidthC[k]);
3362 }
3363
3364 CalculateSwathAndDETConfiguration(
3365 false,
3366 v->NumberOfActivePlanes,
3367 v->DETBufferSizeInKByte[0],
3368 dummy1,
3369 dummy2,
3370 v->SourceScan,
3371 v->SourcePixelFormat,
3372 v->SurfaceTiling,
3373 v->ViewportWidth,
3374 v->ViewportHeight,
3375 v->SurfaceWidthY,
3376 v->SurfaceWidthC,
3377 v->SurfaceHeightY,
3378 v->SurfaceHeightC,
3379 Read256BytesBlockHeightY,
3380 Read256BytesBlockHeightC,
3381 Read256BytesBlockWidthY,
3382 Read256BytesBlockWidthC,
3383 v->ODMCombineEnabled,
3384 v->BlendingAndTiming,
3385 BytePerPixY,
3386 BytePerPixC,
3387 BytePerPixDETY,
3388 BytePerPixDETC,
3389 v->HActive,
3390 v->HRatio,
3391 v->HRatioChroma,
3392 v->DPPPerPlane,
3393 dummy5,
3394 dummy6,
3395 dummy3,
3396 dummy4,
3397 v->SwathHeightY,
3398 v->SwathHeightC,
3399 v->DETBufferSizeY,
3400 v->DETBufferSizeC,
3401 dummy7,
3402 &dummysinglestring);
3403 }
3404
3405 static bool CalculateBytePerPixelAnd256BBlockSizes(
3406 enum source_format_class SourcePixelFormat,
3407 enum dm_swizzle_mode SurfaceTiling,
3408 unsigned int *BytePerPixelY,
3409 unsigned int *BytePerPixelC,
3410 double *BytePerPixelDETY,
3411 double *BytePerPixelDETC,
3412 unsigned int *BlockHeight256BytesY,
3413 unsigned int *BlockHeight256BytesC,
3414 unsigned int *BlockWidth256BytesY,
3415 unsigned int *BlockWidth256BytesC)
3416 {
3417 if (SourcePixelFormat == dm_444_64) {
3418 *BytePerPixelDETY = 8;
3419 *BytePerPixelDETC = 0;
3420 *BytePerPixelY = 8;
3421 *BytePerPixelC = 0;
3422 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3423 *BytePerPixelDETY = 4;
3424 *BytePerPixelDETC = 0;
3425 *BytePerPixelY = 4;
3426 *BytePerPixelC = 0;
3427 } else if (SourcePixelFormat == dm_444_16) {
3428 *BytePerPixelDETY = 2;
3429 *BytePerPixelDETC = 0;
3430 *BytePerPixelY = 2;
3431 *BytePerPixelC = 0;
3432 } else if (SourcePixelFormat == dm_444_8) {
3433 *BytePerPixelDETY = 1;
3434 *BytePerPixelDETC = 0;
3435 *BytePerPixelY = 1;
3436 *BytePerPixelC = 0;
3437 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3438 *BytePerPixelDETY = 4;
3439 *BytePerPixelDETC = 1;
3440 *BytePerPixelY = 4;
3441 *BytePerPixelC = 1;
3442 } else if (SourcePixelFormat == dm_420_8) {
3443 *BytePerPixelDETY = 1;
3444 *BytePerPixelDETC = 2;
3445 *BytePerPixelY = 1;
3446 *BytePerPixelC = 2;
3447 } else if (SourcePixelFormat == dm_420_12) {
3448 *BytePerPixelDETY = 2;
3449 *BytePerPixelDETC = 4;
3450 *BytePerPixelY = 2;
3451 *BytePerPixelC = 4;
3452 } else {
3453 *BytePerPixelDETY = 4.0 / 3;
3454 *BytePerPixelDETC = 8.0 / 3;
3455 *BytePerPixelY = 2;
3456 *BytePerPixelC = 4;
3457 }
3458
3459 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3460 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3461 if (SurfaceTiling == dm_sw_linear) {
3462 *BlockHeight256BytesY = 1;
3463 } else if (SourcePixelFormat == dm_444_64) {
3464 *BlockHeight256BytesY = 4;
3465 } else if (SourcePixelFormat == dm_444_8) {
3466 *BlockHeight256BytesY = 16;
3467 } else {
3468 *BlockHeight256BytesY = 8;
3469 }
3470 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3471 *BlockHeight256BytesC = 0;
3472 *BlockWidth256BytesC = 0;
3473 } else {
3474 if (SurfaceTiling == dm_sw_linear) {
3475 *BlockHeight256BytesY = 1;
3476 *BlockHeight256BytesC = 1;
3477 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3478 *BlockHeight256BytesY = 8;
3479 *BlockHeight256BytesC = 16;
3480 } else if (SourcePixelFormat == dm_420_8) {
3481 *BlockHeight256BytesY = 16;
3482 *BlockHeight256BytesC = 8;
3483 } else {
3484 *BlockHeight256BytesY = 8;
3485 *BlockHeight256BytesC = 8;
3486 }
3487 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3488 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3489 }
3490 return true;
3491 }
3492
3493 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3494 {
3495 if (PrefetchMode == 0) {
3496 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3497 } else if (PrefetchMode == 1) {
3498 return dml_max(SREnterPlusExitTime, UrgentLatency);
3499 } else {
3500 return UrgentLatency;
3501 }
3502 }
3503
3504 double dml31_CalculateWriteBackDISPCLK(
3505 enum source_format_class WritebackPixelFormat,
3506 double PixelClock,
3507 double WritebackHRatio,
3508 double WritebackVRatio,
3509 unsigned int WritebackHTaps,
3510 unsigned int WritebackVTaps,
3511 long WritebackSourceWidth,
3512 long WritebackDestinationWidth,
3513 unsigned int HTotal,
3514 unsigned int WritebackLineBufferSize)
3515 {
3516 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3517
3518 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3519 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3520 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3521 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3522 }
3523
3524 static double CalculateWriteBackDelay(
3525 enum source_format_class WritebackPixelFormat,
3526 double WritebackHRatio,
3527 double WritebackVRatio,
3528 unsigned int WritebackVTaps,
3529 int WritebackDestinationWidth,
3530 int WritebackDestinationHeight,
3531 int WritebackSourceHeight,
3532 unsigned int HTotal)
3533 {
3534 double CalculateWriteBackDelay;
3535 double Line_length;
3536 double Output_lines_last_notclamped;
3537 double WritebackVInit;
3538
3539 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3540 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3541 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3542 if (Output_lines_last_notclamped < 0) {
3543 CalculateWriteBackDelay = 0;
3544 } else {
3545 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3546 }
3547 return CalculateWriteBackDelay;
3548 }
3549
3550 static void CalculateVupdateAndDynamicMetadataParameters(
3551 int MaxInterDCNTileRepeaters,
3552 double DPPCLK,
3553 double DISPCLK,
3554 double DCFClkDeepSleep,
3555 double PixelClock,
3556 int HTotal,
3557 int VBlank,
3558 int DynamicMetadataTransmittedBytes,
3559 int DynamicMetadataLinesBeforeActiveRequired,
3560 int InterlaceEnable,
3561 bool ProgressiveToInterlaceUnitInOPP,
3562 double *TSetup,
3563 double *Tdmbf,
3564 double *Tdmec,
3565 double *Tdmsks,
3566 int *VUpdateOffsetPix,
3567 double *VUpdateWidthPix,
3568 double *VReadyOffsetPix)
3569 {
3570 double TotalRepeaterDelayTime;
3571
3572 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3573 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3574 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3575 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3576 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3577 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3578 *Tdmec = HTotal / PixelClock;
3579 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3580 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3581 } else {
3582 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3583 }
3584 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3585 *Tdmsks = *Tdmsks / 2;
3586 }
3587 #ifdef __DML_VBA_DEBUG__
3588 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3589 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3590 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3591 #endif
3592 }
3593
3594 static void CalculateRowBandwidth(
3595 bool GPUVMEnable,
3596 enum source_format_class SourcePixelFormat,
3597 double VRatio,
3598 double VRatioChroma,
3599 bool DCCEnable,
3600 double LineTime,
3601 unsigned int MetaRowByteLuma,
3602 unsigned int MetaRowByteChroma,
3603 unsigned int meta_row_height_luma,
3604 unsigned int meta_row_height_chroma,
3605 unsigned int PixelPTEBytesPerRowLuma,
3606 unsigned int PixelPTEBytesPerRowChroma,
3607 unsigned int dpte_row_height_luma,
3608 unsigned int dpte_row_height_chroma,
3609 double *meta_row_bw,
3610 double *dpte_row_bw)
3611 {
3612 if (DCCEnable != true) {
3613 *meta_row_bw = 0;
3614 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3615 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3616 } else {
3617 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3618 }
3619
3620 if (GPUVMEnable != true) {
3621 *dpte_row_bw = 0;
3622 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3623 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3624 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3625 } else {
3626 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3627 }
3628 }
3629
3630 static void CalculateFlipSchedule(
3631 struct display_mode_lib *mode_lib,
3632 unsigned int k,
3633 double HostVMInefficiencyFactor,
3634 double UrgentExtraLatency,
3635 double UrgentLatency,
3636 double PDEAndMetaPTEBytesPerFrame,
3637 double MetaRowBytes,
3638 double DPTEBytesPerRow)
3639 {
3640 struct vba_vars_st *v = &mode_lib->vba;
3641 double min_row_time = 0.0;
3642 unsigned int HostVMDynamicLevelsTrips;
3643 double TimeForFetchingMetaPTEImmediateFlip;
3644 double TimeForFetchingRowInVBlankImmediateFlip;
3645 double ImmediateFlipBW;
3646 double LineTime = v->HTotal[k] / v->PixelClock[k];
3647
3648 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3649 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3650 } else {
3651 HostVMDynamicLevelsTrips = 0;
3652 }
3653
3654 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3655 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3656 }
3657
3658 if (v->GPUVMEnable == true) {
3659 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3660 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3661 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3662 LineTime / 4.0);
3663 } else {
3664 TimeForFetchingMetaPTEImmediateFlip = 0;
3665 }
3666
3667 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3668 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3669 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3670 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3671 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3672 LineTime / 4);
3673 } else {
3674 TimeForFetchingRowInVBlankImmediateFlip = 0;
3675 }
3676
3677 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3678
3679 if (v->GPUVMEnable == true) {
3680 v->final_flip_bw[k] = dml_max(
3681 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3682 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3683 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3684 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3685 } else {
3686 v->final_flip_bw[k] = 0;
3687 }
3688
3689 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3690 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3691 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3692 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3693 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3694 } else {
3695 min_row_time = dml_min4(
3696 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3697 v->meta_row_height[k] * LineTime / v->VRatio[k],
3698 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3699 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3700 }
3701 } else {
3702 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3703 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3704 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3705 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3706 } else {
3707 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3708 }
3709 }
3710
3711 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3712 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3713 v->ImmediateFlipSupportedForPipe[k] = false;
3714 } else {
3715 v->ImmediateFlipSupportedForPipe[k] = true;
3716 }
3717
3718 #ifdef __DML_VBA_DEBUG__
3719 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3720 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3721 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3722 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3723 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3724 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3725 #endif
3726
3727 }
3728
3729 static double TruncToValidBPP(
3730 double LinkBitRate,
3731 int Lanes,
3732 int HTotal,
3733 int HActive,
3734 double PixelClock,
3735 double DesiredBPP,
3736 bool DSCEnable,
3737 enum output_encoder_class Output,
3738 enum output_format_class Format,
3739 unsigned int DSCInputBitPerComponent,
3740 int DSCSlices,
3741 int AudioRate,
3742 int AudioLayout,
3743 enum odm_combine_mode ODMCombine)
3744 {
3745 double MaxLinkBPP;
3746 int MinDSCBPP;
3747 double MaxDSCBPP;
3748 int NonDSCBPP0;
3749 int NonDSCBPP1;
3750 int NonDSCBPP2;
3751
3752 if (Format == dm_420) {
3753 NonDSCBPP0 = 12;
3754 NonDSCBPP1 = 15;
3755 NonDSCBPP2 = 18;
3756 MinDSCBPP = 6;
3757 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3758 } else if (Format == dm_444) {
3759 NonDSCBPP0 = 24;
3760 NonDSCBPP1 = 30;
3761 NonDSCBPP2 = 36;
3762 MinDSCBPP = 8;
3763 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3764 } else {
3765 if (Output == dm_hdmi) {
3766 NonDSCBPP0 = 24;
3767 NonDSCBPP1 = 24;
3768 NonDSCBPP2 = 24;
3769 } else {
3770 NonDSCBPP0 = 16;
3771 NonDSCBPP1 = 20;
3772 NonDSCBPP2 = 24;
3773 }
3774 if (Format == dm_n422) {
3775 MinDSCBPP = 7;
3776 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3777 } else {
3778 MinDSCBPP = 8;
3779 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3780 }
3781 }
3782
3783 if (DSCEnable && Output == dm_dp) {
3784 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3785 } else {
3786 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3787 }
3788
3789 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3790 MaxLinkBPP = 16;
3791 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3792 MaxLinkBPP = 32;
3793 }
3794
3795 if (DesiredBPP == 0) {
3796 if (DSCEnable) {
3797 if (MaxLinkBPP < MinDSCBPP) {
3798 return BPP_INVALID;
3799 } else if (MaxLinkBPP >= MaxDSCBPP) {
3800 return MaxDSCBPP;
3801 } else {
3802 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3803 }
3804 } else {
3805 if (MaxLinkBPP >= NonDSCBPP2) {
3806 return NonDSCBPP2;
3807 } else if (MaxLinkBPP >= NonDSCBPP1) {
3808 return NonDSCBPP1;
3809 } else if (MaxLinkBPP >= NonDSCBPP0) {
3810 return 16.0;
3811 } else {
3812 return BPP_INVALID;
3813 }
3814 }
3815 } else {
3816 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3817 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3818 return BPP_INVALID;
3819 } else {
3820 return DesiredBPP;
3821 }
3822 }
3823 return BPP_INVALID;
3824 }
3825
3826 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3827 {
3828 struct vba_vars_st *v = &mode_lib->vba;
3829
3830 int i, j;
3831 unsigned int k, m;
3832 int ReorderingBytes;
3833 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3834 bool NoChroma = true;
3835 bool EnoughWritebackUnits = true;
3836 bool P2IWith420 = false;
3837 bool DSCOnlyIfNecessaryWithBPP = false;
3838 bool DSC422NativeNotSupported = false;
3839 double MaxTotalVActiveRDBandwidth;
3840 bool ViewportExceedsSurface = false;
3841 bool FMTBufferExceeded = false;
3842
3843 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3844
3845 CalculateMinAndMaxPrefetchMode(
3846 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3847 &MinPrefetchMode, &MaxPrefetchMode);
3848
3849 /*Scale Ratio, taps Support Check*/
3850
3851 v->ScaleRatioAndTapsSupport = true;
3852 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3853 if (v->ScalerEnabled[k] == false
3854 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3855 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3856 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3857 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3858 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3859 v->ScaleRatioAndTapsSupport = false;
3860 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3861 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3862 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3863 || v->VRatio[k] > v->vtaps[k]
3864 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3865 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3866 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3867 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3868 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3869 || v->HRatioChroma[k] > v->MaxHSCLRatio
3870 || v->VRatioChroma[k] > v->MaxVSCLRatio
3871 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3872 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3873 v->ScaleRatioAndTapsSupport = false;
3874 }
3875 }
3876 /*Source Format, Pixel Format and Scan Support Check*/
3877
3878 v->SourceFormatPixelAndScanSupport = true;
3879 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3880 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3881 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3882 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3883 v->SourceFormatPixelAndScanSupport = false;
3884 }
3885 }
3886 /*Bandwidth Support Check*/
3887
3888 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3889 CalculateBytePerPixelAnd256BBlockSizes(
3890 v->SourcePixelFormat[k],
3891 v->SurfaceTiling[k],
3892 &v->BytePerPixelY[k],
3893 &v->BytePerPixelC[k],
3894 &v->BytePerPixelInDETY[k],
3895 &v->BytePerPixelInDETC[k],
3896 &v->Read256BlockHeightY[k],
3897 &v->Read256BlockHeightC[k],
3898 &v->Read256BlockWidthY[k],
3899 &v->Read256BlockWidthC[k]);
3900 }
3901 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3902 if (v->SourceScan[k] != dm_vert) {
3903 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3904 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3905 } else {
3906 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3907 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3908 }
3909 }
3910 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3911 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3912 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3913 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3914 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3915 }
3916 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3917 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3918 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3919 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3920 } else if (v->WritebackEnable[k] == true) {
3921 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3922 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3923 } else {
3924 v->WriteBandwidth[k] = 0.0;
3925 }
3926 }
3927
3928 /*Writeback Latency support check*/
3929
3930 v->WritebackLatencySupport = true;
3931 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3932 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3933 v->WritebackLatencySupport = false;
3934 }
3935 }
3936
3937 /*Writeback Mode Support Check*/
3938
3939 v->TotalNumberOfActiveWriteback = 0;
3940 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3941 if (v->WritebackEnable[k] == true) {
3942 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3943 }
3944 }
3945
3946 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3947 EnoughWritebackUnits = false;
3948 }
3949
3950 /*Writeback Scale Ratio and Taps Support Check*/
3951
3952 v->WritebackScaleRatioAndTapsSupport = true;
3953 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3954 if (v->WritebackEnable[k] == true) {
3955 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
3956 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
3957 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
3958 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
3959 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
3960 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
3961 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
3962 v->WritebackScaleRatioAndTapsSupport = false;
3963 }
3964 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3965 v->WritebackScaleRatioAndTapsSupport = false;
3966 }
3967 }
3968 }
3969 /*Maximum DISPCLK/DPPCLK Support check*/
3970
3971 v->WritebackRequiredDISPCLK = 0.0;
3972 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3973 if (v->WritebackEnable[k] == true) {
3974 v->WritebackRequiredDISPCLK = dml_max(
3975 v->WritebackRequiredDISPCLK,
3976 dml31_CalculateWriteBackDISPCLK(
3977 v->WritebackPixelFormat[k],
3978 v->PixelClock[k],
3979 v->WritebackHRatio[k],
3980 v->WritebackVRatio[k],
3981 v->WritebackHTaps[k],
3982 v->WritebackVTaps[k],
3983 v->WritebackSourceWidth[k],
3984 v->WritebackDestinationWidth[k],
3985 v->HTotal[k],
3986 v->WritebackLineBufferSize));
3987 }
3988 }
3989 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3990 if (v->HRatio[k] > 1.0) {
3991 v->PSCL_FACTOR[k] = dml_min(
3992 v->MaxDCHUBToPSCLThroughput,
3993 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3994 } else {
3995 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3996 }
3997 if (v->BytePerPixelC[k] == 0.0) {
3998 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3999 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4000 * dml_max3(
4001 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4002 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4003 1.0);
4004 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4005 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4006 }
4007 } else {
4008 if (v->HRatioChroma[k] > 1.0) {
4009 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4010 v->MaxDCHUBToPSCLThroughput,
4011 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4012 } else {
4013 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4014 }
4015 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4016 * dml_max5(
4017 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4018 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4019 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4020 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4021 1.0);
4022 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4023 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4024 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4025 }
4026 }
4027 }
4028 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4029 int MaximumSwathWidthSupportLuma;
4030 int MaximumSwathWidthSupportChroma;
4031
4032 if (v->SurfaceTiling[k] == dm_sw_linear) {
4033 MaximumSwathWidthSupportLuma = 8192.0;
4034 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4035 MaximumSwathWidthSupportLuma = 2880.0;
4036 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4037 MaximumSwathWidthSupportLuma = 3840.0;
4038 } else {
4039 MaximumSwathWidthSupportLuma = 5760.0;
4040 }
4041
4042 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4043 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4044 } else {
4045 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4046 }
4047 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4048 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4049 if (v->BytePerPixelC[k] == 0.0) {
4050 v->MaximumSwathWidthInLineBufferChroma = 0;
4051 } else {
4052 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4053 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4054 }
4055 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4056 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4057 }
4058
4059 CalculateSwathAndDETConfiguration(
4060 true,
4061 v->NumberOfActivePlanes,
4062 v->DETBufferSizeInKByte[0],
4063 v->MaximumSwathWidthLuma,
4064 v->MaximumSwathWidthChroma,
4065 v->SourceScan,
4066 v->SourcePixelFormat,
4067 v->SurfaceTiling,
4068 v->ViewportWidth,
4069 v->ViewportHeight,
4070 v->SurfaceWidthY,
4071 v->SurfaceWidthC,
4072 v->SurfaceHeightY,
4073 v->SurfaceHeightC,
4074 v->Read256BlockHeightY,
4075 v->Read256BlockHeightC,
4076 v->Read256BlockWidthY,
4077 v->Read256BlockWidthC,
4078 v->odm_combine_dummy,
4079 v->BlendingAndTiming,
4080 v->BytePerPixelY,
4081 v->BytePerPixelC,
4082 v->BytePerPixelInDETY,
4083 v->BytePerPixelInDETC,
4084 v->HActive,
4085 v->HRatio,
4086 v->HRatioChroma,
4087 v->NoOfDPPThisState,
4088 v->swath_width_luma_ub_this_state,
4089 v->swath_width_chroma_ub_this_state,
4090 v->SwathWidthYThisState,
4091 v->SwathWidthCThisState,
4092 v->SwathHeightYThisState,
4093 v->SwathHeightCThisState,
4094 v->DETBufferSizeYThisState,
4095 v->DETBufferSizeCThisState,
4096 v->SingleDPPViewportSizeSupportPerPlane,
4097 &v->ViewportSizeSupport[0][0]);
4098
4099 for (i = 0; i < v->soc.num_states; i++) {
4100 for (j = 0; j < 2; j++) {
4101 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4102 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4103 v->RequiredDISPCLK[i][j] = 0.0;
4104 v->DISPCLK_DPPCLK_Support[i][j] = true;
4105 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4106 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4107 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4108 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4109 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4110 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4111 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4112 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4113 }
4114 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4115 * (1 + v->DISPCLKRampingMargin / 100.0);
4116 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4117 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4118 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4119 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4120 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4121 }
4122 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4123 * (1 + v->DISPCLKRampingMargin / 100.0);
4124 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4125 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4126 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4127 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4128 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4129 }
4130
4131 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4132 || !(v->Output[k] == dm_dp ||
4133 v->Output[k] == dm_edp)) {
4134 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4135 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4136
4137 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4138 FMTBufferExceeded = true;
4139 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4140 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4141 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4142 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4143 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4144 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4145 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4146 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4147 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4148 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4149 } else {
4150 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4151 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4152 }
4153 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4154 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4155 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4156 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4157 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4158 } else {
4159 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4160 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4161 }
4162 }
4163 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4164 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4165 if (v->Output[k] == dm_hdmi) {
4166 FMTBufferExceeded = true;
4167 } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4168 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4169 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4170
4171 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4172 FMTBufferExceeded = true;
4173 } else {
4174 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4175 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4176 }
4177 }
4178 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4179 v->MPCCombine[i][j][k] = false;
4180 v->NoOfDPP[i][j][k] = 4;
4181 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4182 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4183 v->MPCCombine[i][j][k] = false;
4184 v->NoOfDPP[i][j][k] = 2;
4185 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4186 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4187 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4188 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4189 v->MPCCombine[i][j][k] = false;
4190 v->NoOfDPP[i][j][k] = 1;
4191 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4192 } else {
4193 v->MPCCombine[i][j][k] = true;
4194 v->NoOfDPP[i][j][k] = 2;
4195 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4196 }
4197 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4198 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4199 > v->MaxDppclkRoundedDownToDFSGranularity)
4200 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4201 v->DISPCLK_DPPCLK_Support[i][j] = false;
4202 }
4203 }
4204 v->TotalNumberOfActiveDPP[i][j] = 0;
4205 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4206 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4207 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4208 if (v->NoOfDPP[i][j][k] == 1)
4209 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4210 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4211 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4212 NoChroma = false;
4213 }
4214
4215 // UPTO
4216 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4217 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4218 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4219 double BWOfNonSplitPlaneOfMaximumBandwidth;
4220 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4221 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4222 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4223 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4224 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4225 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4226 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4227 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4228 }
4229 }
4230 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4231 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4232 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4233 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4234 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4235 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4236 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4237 }
4238 }
4239 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4240 v->RequiredDISPCLK[i][j] = 0.0;
4241 v->DISPCLK_DPPCLK_Support[i][j] = true;
4242 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4243 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4244 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4245 v->MPCCombine[i][j][k] = true;
4246 v->NoOfDPP[i][j][k] = 2;
4247 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4248 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4249 } else {
4250 v->MPCCombine[i][j][k] = false;
4251 v->NoOfDPP[i][j][k] = 1;
4252 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4253 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4254 }
4255 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4256 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4257 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4258 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4259 } else {
4260 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4261 }
4262 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4263 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4264 > v->MaxDppclkRoundedDownToDFSGranularity)
4265 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4266 v->DISPCLK_DPPCLK_Support[i][j] = false;
4267 }
4268 }
4269 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4270 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4271 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4272 }
4273 }
4274 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4275 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4276 v->DISPCLK_DPPCLK_Support[i][j] = false;
4277 }
4278 }
4279 }
4280
4281 /*Total Available Pipes Support Check*/
4282
4283 for (i = 0; i < v->soc.num_states; i++) {
4284 for (j = 0; j < 2; j++) {
4285 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4286 v->TotalAvailablePipesSupport[i][j] = true;
4287 } else {
4288 v->TotalAvailablePipesSupport[i][j] = false;
4289 }
4290 }
4291 }
4292 /*Display IO and DSC Support Check*/
4293
4294 v->NonsupportedDSCInputBPC = false;
4295 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4296 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4297 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4298 v->NonsupportedDSCInputBPC = true;
4299 }
4300 }
4301
4302 /*Number Of DSC Slices*/
4303 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4304 if (v->BlendingAndTiming[k] == k) {
4305 if (v->PixelClockBackEnd[k] > 3200) {
4306 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4307 } else if (v->PixelClockBackEnd[k] > 1360) {
4308 v->NumberOfDSCSlices[k] = 8;
4309 } else if (v->PixelClockBackEnd[k] > 680) {
4310 v->NumberOfDSCSlices[k] = 4;
4311 } else if (v->PixelClockBackEnd[k] > 340) {
4312 v->NumberOfDSCSlices[k] = 2;
4313 } else {
4314 v->NumberOfDSCSlices[k] = 1;
4315 }
4316 } else {
4317 v->NumberOfDSCSlices[k] = 0;
4318 }
4319 }
4320
4321 for (i = 0; i < v->soc.num_states; i++) {
4322 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4323 v->RequiresDSC[i][k] = false;
4324 v->RequiresFEC[i][k] = false;
4325 if (v->BlendingAndTiming[k] == k) {
4326 if (v->Output[k] == dm_hdmi) {
4327 v->RequiresDSC[i][k] = false;
4328 v->RequiresFEC[i][k] = false;
4329 v->OutputBppPerState[i][k] = TruncToValidBPP(
4330 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4331 3,
4332 v->HTotal[k],
4333 v->HActive[k],
4334 v->PixelClockBackEnd[k],
4335 v->ForcedOutputLinkBPP[k],
4336 false,
4337 v->Output[k],
4338 v->OutputFormat[k],
4339 v->DSCInputBitPerComponent[k],
4340 v->NumberOfDSCSlices[k],
4341 v->AudioSampleRate[k],
4342 v->AudioSampleLayout[k],
4343 v->ODMCombineEnablePerState[i][k]);
4344 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4345 if (v->DSCEnable[k] == true) {
4346 v->RequiresDSC[i][k] = true;
4347 v->LinkDSCEnable = true;
4348 if (v->Output[k] == dm_dp) {
4349 v->RequiresFEC[i][k] = true;
4350 } else {
4351 v->RequiresFEC[i][k] = false;
4352 }
4353 } else {
4354 v->RequiresDSC[i][k] = false;
4355 v->LinkDSCEnable = false;
4356 v->RequiresFEC[i][k] = false;
4357 }
4358
4359 v->Outbpp = BPP_INVALID;
4360 if (v->PHYCLKPerState[i] >= 270.0) {
4361 v->Outbpp = TruncToValidBPP(
4362 (1.0 - v->Downspreading / 100.0) * 2700,
4363 v->OutputLinkDPLanes[k],
4364 v->HTotal[k],
4365 v->HActive[k],
4366 v->PixelClockBackEnd[k],
4367 v->ForcedOutputLinkBPP[k],
4368 v->LinkDSCEnable,
4369 v->Output[k],
4370 v->OutputFormat[k],
4371 v->DSCInputBitPerComponent[k],
4372 v->NumberOfDSCSlices[k],
4373 v->AudioSampleRate[k],
4374 v->AudioSampleLayout[k],
4375 v->ODMCombineEnablePerState[i][k]);
4376 v->OutputBppPerState[i][k] = v->Outbpp;
4377 // TODO: Need some other way to handle this nonsense
4378 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4379 }
4380 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4381 v->Outbpp = TruncToValidBPP(
4382 (1.0 - v->Downspreading / 100.0) * 5400,
4383 v->OutputLinkDPLanes[k],
4384 v->HTotal[k],
4385 v->HActive[k],
4386 v->PixelClockBackEnd[k],
4387 v->ForcedOutputLinkBPP[k],
4388 v->LinkDSCEnable,
4389 v->Output[k],
4390 v->OutputFormat[k],
4391 v->DSCInputBitPerComponent[k],
4392 v->NumberOfDSCSlices[k],
4393 v->AudioSampleRate[k],
4394 v->AudioSampleLayout[k],
4395 v->ODMCombineEnablePerState[i][k]);
4396 v->OutputBppPerState[i][k] = v->Outbpp;
4397 // TODO: Need some other way to handle this nonsense
4398 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4399 }
4400 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4401 v->Outbpp = TruncToValidBPP(
4402 (1.0 - v->Downspreading / 100.0) * 8100,
4403 v->OutputLinkDPLanes[k],
4404 v->HTotal[k],
4405 v->HActive[k],
4406 v->PixelClockBackEnd[k],
4407 v->ForcedOutputLinkBPP[k],
4408 v->LinkDSCEnable,
4409 v->Output[k],
4410 v->OutputFormat[k],
4411 v->DSCInputBitPerComponent[k],
4412 v->NumberOfDSCSlices[k],
4413 v->AudioSampleRate[k],
4414 v->AudioSampleLayout[k],
4415 v->ODMCombineEnablePerState[i][k]);
4416 v->OutputBppPerState[i][k] = v->Outbpp;
4417 // TODO: Need some other way to handle this nonsense
4418 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4419 }
4420 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4421 v->Outbpp = TruncToValidBPP(
4422 (1.0 - v->Downspreading / 100.0) * 10000,
4423 4,
4424 v->HTotal[k],
4425 v->HActive[k],
4426 v->PixelClockBackEnd[k],
4427 v->ForcedOutputLinkBPP[k],
4428 v->LinkDSCEnable,
4429 v->Output[k],
4430 v->OutputFormat[k],
4431 v->DSCInputBitPerComponent[k],
4432 v->NumberOfDSCSlices[k],
4433 v->AudioSampleRate[k],
4434 v->AudioSampleLayout[k],
4435 v->ODMCombineEnablePerState[i][k]);
4436 v->OutputBppPerState[i][k] = v->Outbpp;
4437 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4438 }
4439 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4440 v->Outbpp = TruncToValidBPP(
4441 12000,
4442 4,
4443 v->HTotal[k],
4444 v->HActive[k],
4445 v->PixelClockBackEnd[k],
4446 v->ForcedOutputLinkBPP[k],
4447 v->LinkDSCEnable,
4448 v->Output[k],
4449 v->OutputFormat[k],
4450 v->DSCInputBitPerComponent[k],
4451 v->NumberOfDSCSlices[k],
4452 v->AudioSampleRate[k],
4453 v->AudioSampleLayout[k],
4454 v->ODMCombineEnablePerState[i][k]);
4455 v->OutputBppPerState[i][k] = v->Outbpp;
4456 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4457 }
4458 }
4459 } else {
4460 v->OutputBppPerState[i][k] = 0;
4461 }
4462 }
4463 }
4464
4465 for (i = 0; i < v->soc.num_states; i++) {
4466 v->LinkCapacitySupport[i] = true;
4467 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4468 if (v->BlendingAndTiming[k] == k
4469 && (v->Output[k] == dm_dp ||
4470 v->Output[k] == dm_edp ||
4471 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4472 v->LinkCapacitySupport[i] = false;
4473 }
4474 }
4475 }
4476
4477 // UPTO 2172
4478 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4479 if (v->BlendingAndTiming[k] == k
4480 && (v->Output[k] == dm_dp ||
4481 v->Output[k] == dm_edp ||
4482 v->Output[k] == dm_hdmi)) {
4483 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4484 P2IWith420 = true;
4485 }
4486 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4487 && !v->DSC422NativeSupport) {
4488 DSC422NativeNotSupported = true;
4489 }
4490 }
4491 }
4492
4493 for (i = 0; i < v->soc.num_states; ++i) {
4494 v->ODMCombine4To1SupportCheckOK[i] = true;
4495 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4496 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4497 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4498 || v->Output[k] == dm_hdmi)) {
4499 v->ODMCombine4To1SupportCheckOK[i] = false;
4500 }
4501 }
4502 }
4503
4504 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4505
4506 for (i = 0; i < v->soc.num_states; i++) {
4507 v->NotEnoughDSCUnits[i] = false;
4508 v->TotalDSCUnitsRequired = 0.0;
4509 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4510 if (v->RequiresDSC[i][k] == true) {
4511 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4512 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4513 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4514 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4515 } else {
4516 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4517 }
4518 }
4519 }
4520 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4521 v->NotEnoughDSCUnits[i] = true;
4522 }
4523 }
4524 /*DSC Delay per state*/
4525
4526 for (i = 0; i < v->soc.num_states; i++) {
4527 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4528 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4529 v->BPP = 0.0;
4530 } else {
4531 v->BPP = v->OutputBppPerState[i][k];
4532 }
4533 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4534 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4535 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4536 v->DSCInputBitPerComponent[k],
4537 v->BPP,
4538 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4539 v->NumberOfDSCSlices[k],
4540 v->OutputFormat[k],
4541 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4542 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4543 v->DSCDelayPerState[i][k] = 2.0
4544 * (dscceComputeDelay(
4545 v->DSCInputBitPerComponent[k],
4546 v->BPP,
4547 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4548 v->NumberOfDSCSlices[k] / 2,
4549 v->OutputFormat[k],
4550 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4551 } else {
4552 v->DSCDelayPerState[i][k] = 4.0
4553 * (dscceComputeDelay(
4554 v->DSCInputBitPerComponent[k],
4555 v->BPP,
4556 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4557 v->NumberOfDSCSlices[k] / 4,
4558 v->OutputFormat[k],
4559 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4560 }
4561 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4562 } else {
4563 v->DSCDelayPerState[i][k] = 0.0;
4564 }
4565 }
4566 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4567 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4568 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4569 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4570 }
4571 }
4572 }
4573 }
4574
4575 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4576 //
4577 for (i = 0; i < v->soc.num_states; ++i) {
4578 for (j = 0; j <= 1; ++j) {
4579 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4580 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4581 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4582 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4583 }
4584
4585 CalculateSwathAndDETConfiguration(
4586 false,
4587 v->NumberOfActivePlanes,
4588 v->DETBufferSizeInKByte[0],
4589 v->MaximumSwathWidthLuma,
4590 v->MaximumSwathWidthChroma,
4591 v->SourceScan,
4592 v->SourcePixelFormat,
4593 v->SurfaceTiling,
4594 v->ViewportWidth,
4595 v->ViewportHeight,
4596 v->SurfaceWidthY,
4597 v->SurfaceWidthC,
4598 v->SurfaceHeightY,
4599 v->SurfaceHeightC,
4600 v->Read256BlockHeightY,
4601 v->Read256BlockHeightC,
4602 v->Read256BlockWidthY,
4603 v->Read256BlockWidthC,
4604 v->ODMCombineEnableThisState,
4605 v->BlendingAndTiming,
4606 v->BytePerPixelY,
4607 v->BytePerPixelC,
4608 v->BytePerPixelInDETY,
4609 v->BytePerPixelInDETC,
4610 v->HActive,
4611 v->HRatio,
4612 v->HRatioChroma,
4613 v->NoOfDPPThisState,
4614 v->swath_width_luma_ub_this_state,
4615 v->swath_width_chroma_ub_this_state,
4616 v->SwathWidthYThisState,
4617 v->SwathWidthCThisState,
4618 v->SwathHeightYThisState,
4619 v->SwathHeightCThisState,
4620 v->DETBufferSizeYThisState,
4621 v->DETBufferSizeCThisState,
4622 v->dummystring,
4623 &v->ViewportSizeSupport[i][j]);
4624
4625 CalculateDCFCLKDeepSleep(
4626 mode_lib,
4627 v->NumberOfActivePlanes,
4628 v->BytePerPixelY,
4629 v->BytePerPixelC,
4630 v->VRatio,
4631 v->VRatioChroma,
4632 v->SwathWidthYThisState,
4633 v->SwathWidthCThisState,
4634 v->NoOfDPPThisState,
4635 v->HRatio,
4636 v->HRatioChroma,
4637 v->PixelClock,
4638 v->PSCL_FACTOR,
4639 v->PSCL_FACTOR_CHROMA,
4640 v->RequiredDPPCLKThisState,
4641 v->ReadBandwidthLuma,
4642 v->ReadBandwidthChroma,
4643 v->ReturnBusWidth,
4644 &v->ProjectedDCFCLKDeepSleep[i][j]);
4645
4646 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4647 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4648 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4649 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4650 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4651 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4652 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4653 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4654 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4655 }
4656 }
4657 }
4658
4659 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4660 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4661 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4662 }
4663
4664 for (i = 0; i < v->soc.num_states; i++) {
4665 for (j = 0; j < 2; j++) {
4666 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4667
4668 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4669 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4670 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4671 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4672 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4673 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4674 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4675 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4676 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4677 }
4678
4679 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4680 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4681 if (v->DCCEnable[k] == true) {
4682 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4683 }
4684 }
4685
4686 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4687 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4688 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4689
4690 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4691 && v->SourceScan[k] != dm_vert) {
4692 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4693 / 2;
4694 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4695 } else {
4696 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4697 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4698 }
4699
4700 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4701 mode_lib,
4702 v->DCCEnable[k],
4703 v->Read256BlockHeightC[k],
4704 v->Read256BlockWidthC[k],
4705 v->SourcePixelFormat[k],
4706 v->SurfaceTiling[k],
4707 v->BytePerPixelC[k],
4708 v->SourceScan[k],
4709 v->SwathWidthCThisState[k],
4710 v->ViewportHeightChroma[k],
4711 v->GPUVMEnable,
4712 v->HostVMEnable,
4713 v->HostVMMaxNonCachedPageTableLevels,
4714 v->GPUVMMinPageSize,
4715 v->HostVMMinPageSize,
4716 v->PTEBufferSizeInRequestsForChroma,
4717 v->PitchC[k],
4718 0.0,
4719 &v->MacroTileWidthC[k],
4720 &v->MetaRowBytesC,
4721 &v->DPTEBytesPerRowC,
4722 &v->PTEBufferSizeNotExceededC[i][j][k],
4723 &v->dummyinteger7,
4724 &v->dpte_row_height_chroma[k],
4725 &v->dummyinteger28,
4726 &v->dummyinteger26,
4727 &v->dummyinteger23,
4728 &v->meta_row_height_chroma[k],
4729 &v->dummyinteger8,
4730 &v->dummyinteger9,
4731 &v->dummyinteger19,
4732 &v->dummyinteger20,
4733 &v->dummyinteger17,
4734 &v->dummyinteger10,
4735 &v->dummyinteger11);
4736
4737 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4738 mode_lib,
4739 v->VRatioChroma[k],
4740 v->VTAPsChroma[k],
4741 v->Interlace[k],
4742 v->ProgressiveToInterlaceUnitInOPP,
4743 v->SwathHeightCThisState[k],
4744 v->ViewportYStartC[k],
4745 &v->PrefillC[k],
4746 &v->MaxNumSwC[k]);
4747 } else {
4748 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4749 v->PTEBufferSizeInRequestsForChroma = 0;
4750 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4751 v->MetaRowBytesC = 0.0;
4752 v->DPTEBytesPerRowC = 0.0;
4753 v->PrefetchLinesC[i][j][k] = 0.0;
4754 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4755 }
4756 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4757 mode_lib,
4758 v->DCCEnable[k],
4759 v->Read256BlockHeightY[k],
4760 v->Read256BlockWidthY[k],
4761 v->SourcePixelFormat[k],
4762 v->SurfaceTiling[k],
4763 v->BytePerPixelY[k],
4764 v->SourceScan[k],
4765 v->SwathWidthYThisState[k],
4766 v->ViewportHeight[k],
4767 v->GPUVMEnable,
4768 v->HostVMEnable,
4769 v->HostVMMaxNonCachedPageTableLevels,
4770 v->GPUVMMinPageSize,
4771 v->HostVMMinPageSize,
4772 v->PTEBufferSizeInRequestsForLuma,
4773 v->PitchY[k],
4774 v->DCCMetaPitchY[k],
4775 &v->MacroTileWidthY[k],
4776 &v->MetaRowBytesY,
4777 &v->DPTEBytesPerRowY,
4778 &v->PTEBufferSizeNotExceededY[i][j][k],
4779 &v->dummyinteger7,
4780 &v->dpte_row_height[k],
4781 &v->dummyinteger29,
4782 &v->dummyinteger27,
4783 &v->dummyinteger24,
4784 &v->meta_row_height[k],
4785 &v->dummyinteger25,
4786 &v->dpte_group_bytes[k],
4787 &v->dummyinteger21,
4788 &v->dummyinteger22,
4789 &v->dummyinteger18,
4790 &v->dummyinteger5,
4791 &v->dummyinteger6);
4792 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4793 mode_lib,
4794 v->VRatio[k],
4795 v->vtaps[k],
4796 v->Interlace[k],
4797 v->ProgressiveToInterlaceUnitInOPP,
4798 v->SwathHeightYThisState[k],
4799 v->ViewportYStartY[k],
4800 &v->PrefillY[k],
4801 &v->MaxNumSwY[k]);
4802 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4803 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4804 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4805
4806 CalculateRowBandwidth(
4807 v->GPUVMEnable,
4808 v->SourcePixelFormat[k],
4809 v->VRatio[k],
4810 v->VRatioChroma[k],
4811 v->DCCEnable[k],
4812 v->HTotal[k] / v->PixelClock[k],
4813 v->MetaRowBytesY,
4814 v->MetaRowBytesC,
4815 v->meta_row_height[k],
4816 v->meta_row_height_chroma[k],
4817 v->DPTEBytesPerRowY,
4818 v->DPTEBytesPerRowC,
4819 v->dpte_row_height[k],
4820 v->dpte_row_height_chroma[k],
4821 &v->meta_row_bandwidth[i][j][k],
4822 &v->dpte_row_bandwidth[i][j][k]);
4823 }
4824 v->UrgLatency[i] = CalculateUrgentLatency(
4825 v->UrgentLatencyPixelDataOnly,
4826 v->UrgentLatencyPixelMixedWithVMData,
4827 v->UrgentLatencyVMDataOnly,
4828 v->DoUrgentLatencyAdjustment,
4829 v->UrgentLatencyAdjustmentFabricClockComponent,
4830 v->UrgentLatencyAdjustmentFabricClockReference,
4831 v->FabricClockPerState[i]);
4832
4833 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4834 CalculateUrgentBurstFactor(
4835 v->swath_width_luma_ub_this_state[k],
4836 v->swath_width_chroma_ub_this_state[k],
4837 v->SwathHeightYThisState[k],
4838 v->SwathHeightCThisState[k],
4839 v->HTotal[k] / v->PixelClock[k],
4840 v->UrgLatency[i],
4841 v->CursorBufferSize,
4842 v->CursorWidth[k][0],
4843 v->CursorBPP[k][0],
4844 v->VRatio[k],
4845 v->VRatioChroma[k],
4846 v->BytePerPixelInDETY[k],
4847 v->BytePerPixelInDETC[k],
4848 v->DETBufferSizeYThisState[k],
4849 v->DETBufferSizeCThisState[k],
4850 &v->UrgentBurstFactorCursor[k],
4851 &v->UrgentBurstFactorLuma[k],
4852 &v->UrgentBurstFactorChroma[k],
4853 &NotUrgentLatencyHiding[k]);
4854 }
4855
4856 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4857 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4858 if (NotUrgentLatencyHiding[k]) {
4859 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4860 }
4861 }
4862
4863 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4864 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4865 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4866 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4867 }
4868
4869 v->TotalVActivePixelBandwidth[i][j] = 0;
4870 v->TotalVActiveCursorBandwidth[i][j] = 0;
4871 v->TotalMetaRowBandwidth[i][j] = 0;
4872 v->TotalDPTERowBandwidth[i][j] = 0;
4873 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4874 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4875 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4876 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4877 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4878 }
4879 }
4880 }
4881
4882 //Calculate Return BW
4883 for (i = 0; i < v->soc.num_states; ++i) {
4884 for (j = 0; j <= 1; ++j) {
4885 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4886 if (v->BlendingAndTiming[k] == k) {
4887 if (v->WritebackEnable[k] == true) {
4888 v->WritebackDelayTime[k] = v->WritebackLatency
4889 + CalculateWriteBackDelay(
4890 v->WritebackPixelFormat[k],
4891 v->WritebackHRatio[k],
4892 v->WritebackVRatio[k],
4893 v->WritebackVTaps[k],
4894 v->WritebackDestinationWidth[k],
4895 v->WritebackDestinationHeight[k],
4896 v->WritebackSourceHeight[k],
4897 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4898 } else {
4899 v->WritebackDelayTime[k] = 0.0;
4900 }
4901 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4902 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4903 v->WritebackDelayTime[k] = dml_max(
4904 v->WritebackDelayTime[k],
4905 v->WritebackLatency
4906 + CalculateWriteBackDelay(
4907 v->WritebackPixelFormat[m],
4908 v->WritebackHRatio[m],
4909 v->WritebackVRatio[m],
4910 v->WritebackVTaps[m],
4911 v->WritebackDestinationWidth[m],
4912 v->WritebackDestinationHeight[m],
4913 v->WritebackSourceHeight[m],
4914 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4915 }
4916 }
4917 }
4918 }
4919 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4920 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4921 if (v->BlendingAndTiming[k] == m) {
4922 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4923 }
4924 }
4925 }
4926 v->MaxMaxVStartup[i][j] = 0;
4927 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4928 v->MaximumVStartup[i][j][k] =
4929 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
4930 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
4931 v->VTotal[k] - v->VActive[k]
4932 - dml_max(
4933 1.0,
4934 dml_ceil(
4935 1.0 * v->WritebackDelayTime[k]
4936 / (v->HTotal[k]
4937 / v->PixelClock[k]),
4938 1.0));
4939 if (v->MaximumVStartup[i][j][k] > 1023)
4940 v->MaximumVStartup[i][j][k] = 1023;
4941 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4942 }
4943 }
4944 }
4945
4946 ReorderingBytes = v->NumberOfChannels
4947 * dml_max3(
4948 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4949 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4950 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4951
4952 for (i = 0; i < v->soc.num_states; ++i) {
4953 for (j = 0; j <= 1; ++j) {
4954 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4955 }
4956 }
4957
4958 if (v->UseMinimumRequiredDCFCLK == true) {
4959 UseMinimumDCFCLK(
4960 mode_lib,
4961 v->MaxInterDCNTileRepeaters,
4962 MaxPrefetchMode,
4963 v->DRAMClockChangeLatency,
4964 v->SREnterPlusExitTime,
4965 v->ReturnBusWidth,
4966 v->RoundTripPingLatencyCycles,
4967 ReorderingBytes,
4968 v->PixelChunkSizeInKByte,
4969 v->MetaChunkSize,
4970 v->GPUVMEnable,
4971 v->GPUVMMaxPageTableLevels,
4972 v->HostVMEnable,
4973 v->NumberOfActivePlanes,
4974 v->HostVMMinPageSize,
4975 v->HostVMMaxNonCachedPageTableLevels,
4976 v->DynamicMetadataVMEnabled,
4977 v->ImmediateFlipRequirement[0],
4978 v->ProgressiveToInterlaceUnitInOPP,
4979 v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
4980 v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
4981 v->VTotal,
4982 v->VActive,
4983 v->DynamicMetadataTransmittedBytes,
4984 v->DynamicMetadataLinesBeforeActiveRequired,
4985 v->Interlace,
4986 v->RequiredDPPCLK,
4987 v->RequiredDISPCLK,
4988 v->UrgLatency,
4989 v->NoOfDPP,
4990 v->ProjectedDCFCLKDeepSleep,
4991 v->MaximumVStartup,
4992 v->TotalVActivePixelBandwidth,
4993 v->TotalVActiveCursorBandwidth,
4994 v->TotalMetaRowBandwidth,
4995 v->TotalDPTERowBandwidth,
4996 v->TotalNumberOfActiveDPP,
4997 v->TotalNumberOfDCCActiveDPP,
4998 v->dpte_group_bytes,
4999 v->PrefetchLinesY,
5000 v->PrefetchLinesC,
5001 v->swath_width_luma_ub_all_states,
5002 v->swath_width_chroma_ub_all_states,
5003 v->BytePerPixelY,
5004 v->BytePerPixelC,
5005 v->HTotal,
5006 v->PixelClock,
5007 v->PDEAndMetaPTEBytesPerFrame,
5008 v->DPTEBytesPerRow,
5009 v->MetaRowBytes,
5010 v->DynamicMetadataEnable,
5011 v->VActivePixelBandwidth,
5012 v->VActiveCursorBandwidth,
5013 v->ReadBandwidthLuma,
5014 v->ReadBandwidthChroma,
5015 v->DCFCLKPerState,
5016 v->DCFCLKState);
5017 }
5018
5019 for (i = 0; i < v->soc.num_states; ++i) {
5020 for (j = 0; j <= 1; ++j) {
5021 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5022 v->ReturnBusWidth * v->DCFCLKState[i][j],
5023 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5024 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5025 double PixelDataOnlyReturnBWPerState = dml_min(
5026 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5027 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5028 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5029 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5030 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5031
5032 if (v->HostVMEnable != true) {
5033 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5034 } else {
5035 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5036 }
5037 }
5038 }
5039
5040 //Re-ordering Buffer Support Check
5041 for (i = 0; i < v->soc.num_states; ++i) {
5042 for (j = 0; j <= 1; ++j) {
5043 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5044 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5045 v->ROBSupport[i][j] = true;
5046 } else {
5047 v->ROBSupport[i][j] = false;
5048 }
5049 }
5050 }
5051
5052 //Vertical Active BW support check
5053
5054 MaxTotalVActiveRDBandwidth = 0;
5055 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5056 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5057 }
5058
5059 for (i = 0; i < v->soc.num_states; ++i) {
5060 for (j = 0; j <= 1; ++j) {
5061 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5062 dml_min(
5063 v->ReturnBusWidth * v->DCFCLKState[i][j],
5064 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5065 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5066 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5067 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5068
5069 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5070 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5071 } else {
5072 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5073 }
5074 }
5075 }
5076
5077 v->UrgentLatency = CalculateUrgentLatency(
5078 v->UrgentLatencyPixelDataOnly,
5079 v->UrgentLatencyPixelMixedWithVMData,
5080 v->UrgentLatencyVMDataOnly,
5081 v->DoUrgentLatencyAdjustment,
5082 v->UrgentLatencyAdjustmentFabricClockComponent,
5083 v->UrgentLatencyAdjustmentFabricClockReference,
5084 v->FabricClock);
5085 //Prefetch Check
5086 for (i = 0; i < v->soc.num_states; ++i) {
5087 for (j = 0; j <= 1; ++j) {
5088 double VMDataOnlyReturnBWPerState;
5089 double HostVMInefficiencyFactor = 1;
5090 int NextPrefetchModeState = MinPrefetchMode;
5091 bool UnboundedRequestEnabledThisState = false;
5092 int CompressedBufferSizeInkByteThisState = 0;
5093 double dummy;
5094
5095 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5096
5097 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5098 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5099 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5100 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5101 }
5102
5103 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5104 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5105 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5106 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5107 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5108 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5109 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5110 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5111 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5112 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5113 }
5114
5115 VMDataOnlyReturnBWPerState = dml_min(
5116 dml_min(
5117 v->ReturnBusWidth * v->DCFCLKState[i][j],
5118 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5119 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5120 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5121 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5122 if (v->GPUVMEnable && v->HostVMEnable)
5123 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5124
5125 v->ExtraLatency = CalculateExtraLatency(
5126 v->RoundTripPingLatencyCycles,
5127 ReorderingBytes,
5128 v->DCFCLKState[i][j],
5129 v->TotalNumberOfActiveDPP[i][j],
5130 v->PixelChunkSizeInKByte,
5131 v->TotalNumberOfDCCActiveDPP[i][j],
5132 v->MetaChunkSize,
5133 v->ReturnBWPerState[i][j],
5134 v->GPUVMEnable,
5135 v->HostVMEnable,
5136 v->NumberOfActivePlanes,
5137 v->NoOfDPPThisState,
5138 v->dpte_group_bytes,
5139 HostVMInefficiencyFactor,
5140 v->HostVMMinPageSize,
5141 v->HostVMMaxNonCachedPageTableLevels);
5142
5143 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5144 do {
5145 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5146 v->MaxVStartup = v->NextMaxVStartup;
5147
5148 v->TWait = CalculateTWait(
5149 v->PrefetchModePerState[i][j],
5150 v->DRAMClockChangeLatency,
5151 v->UrgLatency[i],
5152 v->SREnterPlusExitTime);
5153
5154 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5155 Pipe myPipe;
5156
5157 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
5158 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
5159 myPipe.PixelClock = v->PixelClock[k];
5160 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
5161 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
5162 myPipe.ScalerEnabled = v->ScalerEnabled[k];
5163 myPipe.SourceScan = v->SourceScan[k];
5164 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
5165 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
5166 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
5167 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
5168 myPipe.InterlaceEnable = v->Interlace[k];
5169 myPipe.NumberOfCursors = v->NumberOfCursors[k];
5170 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
5171 myPipe.HTotal = v->HTotal[k];
5172 myPipe.DCCEnable = v->DCCEnable[k];
5173 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
5174 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
5175 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
5176 myPipe.BytePerPixelY = v->BytePerPixelY[k];
5177 myPipe.BytePerPixelC = v->BytePerPixelC[k];
5178 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
5179 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
5180 mode_lib,
5181 HostVMInefficiencyFactor,
5182 &myPipe,
5183 v->DSCDelayPerState[i][k],
5184 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
5185 v->DPPCLKDelaySCL,
5186 v->DPPCLKDelaySCLLBOnly,
5187 v->DPPCLKDelayCNVCCursor,
5188 v->DISPCLKDelaySubtotal,
5189 v->SwathWidthYThisState[k] / v->HRatio[k],
5190 v->OutputFormat[k],
5191 v->MaxInterDCNTileRepeaters,
5192 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
5193 v->MaximumVStartup[i][j][k],
5194 v->GPUVMMaxPageTableLevels,
5195 v->GPUVMEnable,
5196 v->HostVMEnable,
5197 v->HostVMMaxNonCachedPageTableLevels,
5198 v->HostVMMinPageSize,
5199 v->DynamicMetadataEnable[k],
5200 v->DynamicMetadataVMEnabled,
5201 v->DynamicMetadataLinesBeforeActiveRequired[k],
5202 v->DynamicMetadataTransmittedBytes[k],
5203 v->UrgLatency[i],
5204 v->ExtraLatency,
5205 v->TimeCalc,
5206 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5207 v->MetaRowBytes[i][j][k],
5208 v->DPTEBytesPerRow[i][j][k],
5209 v->PrefetchLinesY[i][j][k],
5210 v->SwathWidthYThisState[k],
5211 v->PrefillY[k],
5212 v->MaxNumSwY[k],
5213 v->PrefetchLinesC[i][j][k],
5214 v->SwathWidthCThisState[k],
5215 v->PrefillC[k],
5216 v->MaxNumSwC[k],
5217 v->swath_width_luma_ub_this_state[k],
5218 v->swath_width_chroma_ub_this_state[k],
5219 v->SwathHeightYThisState[k],
5220 v->SwathHeightCThisState[k],
5221 v->TWait,
5222 &v->DSTXAfterScaler[k],
5223 &v->DSTYAfterScaler[k],
5224 &v->LineTimesForPrefetch[k],
5225 &v->PrefetchBW[k],
5226 &v->LinesForMetaPTE[k],
5227 &v->LinesForMetaAndDPTERow[k],
5228 &v->VRatioPreY[i][j][k],
5229 &v->VRatioPreC[i][j][k],
5230 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
5231 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
5232 &v->NoTimeForDynamicMetadata[i][j][k],
5233 &v->Tno_bw[k],
5234 &v->prefetch_vmrow_bw[k],
5235 &v->dummy7[k],
5236 &v->dummy8[k],
5237 &v->dummy13[k],
5238 &v->VUpdateOffsetPix[k],
5239 &v->VUpdateWidthPix[k],
5240 &v->VReadyOffsetPix[k]);
5241 }
5242
5243 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5244 CalculateUrgentBurstFactor(
5245 v->swath_width_luma_ub_this_state[k],
5246 v->swath_width_chroma_ub_this_state[k],
5247 v->SwathHeightYThisState[k],
5248 v->SwathHeightCThisState[k],
5249 v->HTotal[k] / v->PixelClock[k],
5250 v->UrgentLatency,
5251 v->CursorBufferSize,
5252 v->CursorWidth[k][0],
5253 v->CursorBPP[k][0],
5254 v->VRatioPreY[i][j][k],
5255 v->VRatioPreC[i][j][k],
5256 v->BytePerPixelInDETY[k],
5257 v->BytePerPixelInDETC[k],
5258 v->DETBufferSizeYThisState[k],
5259 v->DETBufferSizeCThisState[k],
5260 &v->UrgentBurstFactorCursorPre[k],
5261 &v->UrgentBurstFactorLumaPre[k],
5262 &v->UrgentBurstFactorChroma[k],
5263 &v->NotUrgentLatencyHidingPre[k]);
5264 }
5265
5266 v->MaximumReadBandwidthWithPrefetch = 0.0;
5267 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5268 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5269 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5270
5271 v->MaximumReadBandwidthWithPrefetch =
5272 v->MaximumReadBandwidthWithPrefetch
5273 + dml_max3(
5274 v->VActivePixelBandwidth[i][j][k]
5275 + v->VActiveCursorBandwidth[i][j][k]
5276 + v->NoOfDPP[i][j][k]
5277 * (v->meta_row_bandwidth[i][j][k]
5278 + v->dpte_row_bandwidth[i][j][k]),
5279 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5280 v->NoOfDPP[i][j][k]
5281 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5282 * v->UrgentBurstFactorLumaPre[k]
5283 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5284 * v->UrgentBurstFactorChromaPre[k])
5285 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5286 }
5287
5288 v->NotEnoughUrgentLatencyHidingPre = false;
5289 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5290 if (v->NotUrgentLatencyHidingPre[k] == true) {
5291 v->NotEnoughUrgentLatencyHidingPre = true;
5292 }
5293 }
5294
5295 v->PrefetchSupported[i][j] = true;
5296 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5297 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5298 v->PrefetchSupported[i][j] = false;
5299 }
5300 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5301 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5302 || v->NoTimeForPrefetch[i][j][k] == true) {
5303 v->PrefetchSupported[i][j] = false;
5304 }
5305 }
5306
5307 v->DynamicMetadataSupported[i][j] = true;
5308 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5309 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5310 v->DynamicMetadataSupported[i][j] = false;
5311 }
5312 }
5313
5314 v->VRatioInPrefetchSupported[i][j] = true;
5315 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5316 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5317 v->VRatioInPrefetchSupported[i][j] = false;
5318 }
5319 }
5320 v->AnyLinesForVMOrRowTooLarge = false;
5321 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5322 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5323 v->AnyLinesForVMOrRowTooLarge = true;
5324 }
5325 }
5326
5327 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5328
5329 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5330 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5331 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5332 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5333 - dml_max(
5334 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5335 v->NoOfDPP[i][j][k]
5336 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5337 * v->UrgentBurstFactorLumaPre[k]
5338 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5339 * v->UrgentBurstFactorChromaPre[k])
5340 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5341 }
5342 v->TotImmediateFlipBytes = 0.0;
5343 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5344 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5345 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5346 + v->DPTEBytesPerRow[i][j][k];
5347 }
5348
5349 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5350 CalculateFlipSchedule(
5351 mode_lib,
5352 k,
5353 HostVMInefficiencyFactor,
5354 v->ExtraLatency,
5355 v->UrgLatency[i],
5356 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5357 v->MetaRowBytes[i][j][k],
5358 v->DPTEBytesPerRow[i][j][k]);
5359 }
5360 v->total_dcn_read_bw_with_flip = 0.0;
5361 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5362 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5363 + dml_max3(
5364 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5365 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5366 + v->VActiveCursorBandwidth[i][j][k],
5367 v->NoOfDPP[i][j][k]
5368 * (v->final_flip_bw[k]
5369 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5370 * v->UrgentBurstFactorLumaPre[k]
5371 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5372 * v->UrgentBurstFactorChromaPre[k])
5373 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5374 }
5375 v->ImmediateFlipSupportedForState[i][j] = true;
5376 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5377 v->ImmediateFlipSupportedForState[i][j] = false;
5378 }
5379 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5380 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5381 v->ImmediateFlipSupportedForState[i][j] = false;
5382 }
5383 }
5384 } else {
5385 v->ImmediateFlipSupportedForState[i][j] = false;
5386 }
5387
5388 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5389 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5390 NextPrefetchModeState = NextPrefetchModeState + 1;
5391 } else {
5392 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5393 }
5394 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5395 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5396 && ((v->HostVMEnable == false &&
5397 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5398 || v->ImmediateFlipSupportedForState[i][j] == true))
5399 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5400
5401 CalculateUnboundedRequestAndCompressedBufferSize(
5402 v->DETBufferSizeInKByte[0],
5403 v->ConfigReturnBufferSizeInKByte,
5404 v->UseUnboundedRequesting,
5405 v->TotalNumberOfActiveDPP[i][j],
5406 NoChroma,
5407 v->MaxNumDPP,
5408 v->CompressedBufferSegmentSizeInkByte,
5409 v->Output,
5410 &UnboundedRequestEnabledThisState,
5411 &CompressedBufferSizeInkByteThisState);
5412
5413 CalculateWatermarksAndDRAMSpeedChangeSupport(
5414 mode_lib,
5415 v->PrefetchModePerState[i][j],
5416 v->DCFCLKState[i][j],
5417 v->ReturnBWPerState[i][j],
5418 v->UrgLatency[i],
5419 v->ExtraLatency,
5420 v->SOCCLKPerState[i],
5421 v->ProjectedDCFCLKDeepSleep[i][j],
5422 v->DETBufferSizeYThisState,
5423 v->DETBufferSizeCThisState,
5424 v->SwathHeightYThisState,
5425 v->SwathHeightCThisState,
5426 v->SwathWidthYThisState,
5427 v->SwathWidthCThisState,
5428 v->NoOfDPPThisState,
5429 v->BytePerPixelInDETY,
5430 v->BytePerPixelInDETC,
5431 UnboundedRequestEnabledThisState,
5432 CompressedBufferSizeInkByteThisState,
5433 &v->DRAMClockChangeSupport[i][j],
5434 &dummy,
5435 &dummy,
5436 &dummy,
5437 &dummy);
5438 }
5439 }
5440
5441 /*PTE Buffer Size Check*/
5442 for (i = 0; i < v->soc.num_states; i++) {
5443 for (j = 0; j < 2; j++) {
5444 v->PTEBufferSizeNotExceeded[i][j] = true;
5445 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5446 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5447 v->PTEBufferSizeNotExceeded[i][j] = false;
5448 }
5449 }
5450 }
5451 }
5452
5453 /*Cursor Support Check*/
5454 v->CursorSupport = true;
5455 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5456 if (v->CursorWidth[k][0] > 0.0) {
5457 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5458 v->CursorSupport = false;
5459 }
5460 }
5461 }
5462
5463 /*Valid Pitch Check*/
5464 v->PitchSupport = true;
5465 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5466 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5467 if (v->DCCEnable[k] == true) {
5468 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5469 } else {
5470 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5471 }
5472 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5473 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5474 && v->SourcePixelFormat[k] != dm_mono_8) {
5475 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5476 if (v->DCCEnable[k] == true) {
5477 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5478 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5479 64.0 * v->Read256BlockWidthC[k]);
5480 } else {
5481 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5482 }
5483 } else {
5484 v->AlignedCPitch[k] = v->PitchC[k];
5485 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5486 }
5487 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5488 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5489 v->PitchSupport = false;
5490 }
5491 }
5492
5493 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5494 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5495 ViewportExceedsSurface = true;
5496 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5497 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5498 && v->SourcePixelFormat[k] != dm_rgbe) {
5499 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5500 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5501 ViewportExceedsSurface = true;
5502 }
5503 }
5504 }
5505 }
5506
5507 /*Mode Support, Voltage State and SOC Configuration*/
5508 for (i = v->soc.num_states - 1; i >= 0; i--) {
5509 for (j = 0; j < 2; j++) {
5510 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5511 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5512 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5513 && v->DTBCLKRequiredMoreThanSupported[i] == false
5514 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5515 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5516 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5517 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5518 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5519 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5520 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5521 && ((v->HostVMEnable == false
5522 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5523 || v->ImmediateFlipSupportedForState[i][j] == true)
5524 && FMTBufferExceeded == false) {
5525 v->ModeSupport[i][j] = true;
5526 } else {
5527 v->ModeSupport[i][j] = false;
5528 }
5529 }
5530 }
5531
5532 {
5533 unsigned int MaximumMPCCombine = 0;
5534 for (i = v->soc.num_states; i >= 0; i--) {
5535 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5536 v->VoltageLevel = i;
5537 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5538 if (v->ModeSupport[i][0] == true) {
5539 MaximumMPCCombine = 0;
5540 } else {
5541 MaximumMPCCombine = 1;
5542 }
5543 }
5544 }
5545 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5546 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5547 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5548 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5549 }
5550 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5551 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5552 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5553 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5554 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5555 v->maxMpcComb = MaximumMPCCombine;
5556 }
5557 }
5558
5559 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5560 struct display_mode_lib *mode_lib,
5561 unsigned int PrefetchMode,
5562 double DCFCLK,
5563 double ReturnBW,
5564 double UrgentLatency,
5565 double ExtraLatency,
5566 double SOCCLK,
5567 double DCFCLKDeepSleep,
5568 unsigned int DETBufferSizeY[],
5569 unsigned int DETBufferSizeC[],
5570 unsigned int SwathHeightY[],
5571 unsigned int SwathHeightC[],
5572 double SwathWidthY[],
5573 double SwathWidthC[],
5574 unsigned int DPPPerPlane[],
5575 double BytePerPixelDETY[],
5576 double BytePerPixelDETC[],
5577 bool UnboundedRequestEnabled,
5578 int unsigned CompressedBufferSizeInkByte,
5579 enum clock_change_support *DRAMClockChangeSupport,
5580 double *StutterExitWatermark,
5581 double *StutterEnterPlusExitWatermark,
5582 double *Z8StutterExitWatermark,
5583 double *Z8StutterEnterPlusExitWatermark)
5584 {
5585 struct vba_vars_st *v = &mode_lib->vba;
5586 double EffectiveLBLatencyHidingY;
5587 double EffectiveLBLatencyHidingC;
5588 double LinesInDETY[DC__NUM_DPP__MAX];
5589 double LinesInDETC;
5590 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5591 unsigned int LinesInDETCRoundedDownToSwath;
5592 double FullDETBufferingTimeY;
5593 double FullDETBufferingTimeC;
5594 double ActiveDRAMClockChangeLatencyMarginY;
5595 double ActiveDRAMClockChangeLatencyMarginC;
5596 double WritebackDRAMClockChangeLatencyMargin;
5597 double PlaneWithMinActiveDRAMClockChangeMargin;
5598 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5599 double WritebackDRAMClockChangeLatencyHiding;
5600 double TotalPixelBW = 0.0;
5601 int k, j;
5602
5603 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5604
5605 #ifdef __DML_VBA_DEBUG__
5606 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5607 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5608 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5609 #endif
5610
5611 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5612
5613 #ifdef __DML_VBA_DEBUG__
5614 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5615 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5616 #endif
5617
5618 v->TotalActiveWriteback = 0;
5619 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5620 if (v->WritebackEnable[k] == true) {
5621 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5622 }
5623 }
5624
5625 if (v->TotalActiveWriteback <= 1) {
5626 v->WritebackUrgentWatermark = v->WritebackLatency;
5627 } else {
5628 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5629 }
5630
5631 if (v->TotalActiveWriteback <= 1) {
5632 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5633 } else {
5634 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5635 }
5636
5637 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5638 TotalPixelBW = TotalPixelBW
5639 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5640 / (v->HTotal[k] / v->PixelClock[k]);
5641 }
5642
5643 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5644 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5645
5646 v->LBLatencyHidingSourceLinesY = dml_min(
5647 (double) v->MaxLineBufferLines,
5648 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5649
5650 v->LBLatencyHidingSourceLinesC = dml_min(
5651 (double) v->MaxLineBufferLines,
5652 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5653
5654 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5655
5656 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5657
5658 if (UnboundedRequestEnabled) {
5659 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5660 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5661 }
5662
5663 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5664 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5665 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5666 if (BytePerPixelDETC[k] > 0) {
5667 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5668 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5669 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5670 } else {
5671 LinesInDETC = 0;
5672 FullDETBufferingTimeC = 999999;
5673 }
5674
5675 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5676 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5677
5678 if (v->NumberOfActivePlanes > 1) {
5679 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5680 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5681 }
5682
5683 if (BytePerPixelDETC[k] > 0) {
5684 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5685 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5686
5687 if (v->NumberOfActivePlanes > 1) {
5688 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5689 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5690 }
5691 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5692 } else {
5693 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5694 }
5695
5696 if (v->WritebackEnable[k] == true) {
5697 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5698 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5699 if (v->WritebackPixelFormat[k] == dm_444_64) {
5700 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5701 }
5702 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5703 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5704 }
5705 }
5706
5707 v->MinActiveDRAMClockChangeMargin = 999999;
5708 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5709 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5710 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5711 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5712 if (v->BlendingAndTiming[k] == k) {
5713 PlaneWithMinActiveDRAMClockChangeMargin = k;
5714 } else {
5715 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5716 if (v->BlendingAndTiming[k] == j) {
5717 PlaneWithMinActiveDRAMClockChangeMargin = j;
5718 }
5719 }
5720 }
5721 }
5722 }
5723
5724 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5725
5726 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5727 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5728 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5729 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5730 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5731 }
5732 }
5733
5734 v->TotalNumberOfActiveOTG = 0;
5735
5736 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5737 if (v->BlendingAndTiming[k] == k) {
5738 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5739 }
5740 }
5741
5742 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5743 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5744 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5745 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5746 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5747 } else {
5748 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5749 }
5750
5751 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5752 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5753 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5754 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5755
5756 #ifdef __DML_VBA_DEBUG__
5757 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5758 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5759 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5760 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5761 #endif
5762 }
5763
5764 static void CalculateDCFCLKDeepSleep(
5765 struct display_mode_lib *mode_lib,
5766 unsigned int NumberOfActivePlanes,
5767 int BytePerPixelY[],
5768 int BytePerPixelC[],
5769 double VRatio[],
5770 double VRatioChroma[],
5771 double SwathWidthY[],
5772 double SwathWidthC[],
5773 unsigned int DPPPerPlane[],
5774 double HRatio[],
5775 double HRatioChroma[],
5776 double PixelClock[],
5777 double PSCL_THROUGHPUT[],
5778 double PSCL_THROUGHPUT_CHROMA[],
5779 double DPPCLK[],
5780 double ReadBandwidthLuma[],
5781 double ReadBandwidthChroma[],
5782 int ReturnBusWidth,
5783 double *DCFCLKDeepSleep)
5784 {
5785 struct vba_vars_st *v = &mode_lib->vba;
5786 double DisplayPipeLineDeliveryTimeLuma;
5787 double DisplayPipeLineDeliveryTimeChroma;
5788 double ReadBandwidth = 0.0;
5789 int k;
5790
5791 for (k = 0; k < NumberOfActivePlanes; ++k) {
5792
5793 if (VRatio[k] <= 1) {
5794 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5795 } else {
5796 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5797 }
5798 if (BytePerPixelC[k] == 0) {
5799 DisplayPipeLineDeliveryTimeChroma = 0;
5800 } else {
5801 if (VRatioChroma[k] <= 1) {
5802 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5803 } else {
5804 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5805 }
5806 }
5807
5808 if (BytePerPixelC[k] > 0) {
5809 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5810 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5811 } else {
5812 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5813 }
5814 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5815
5816 }
5817
5818 for (k = 0; k < NumberOfActivePlanes; ++k) {
5819 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5820 }
5821
5822 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5823
5824 for (k = 0; k < NumberOfActivePlanes; ++k) {
5825 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5826 }
5827 }
5828
5829 static void CalculateUrgentBurstFactor(
5830 int swath_width_luma_ub,
5831 int swath_width_chroma_ub,
5832 unsigned int SwathHeightY,
5833 unsigned int SwathHeightC,
5834 double LineTime,
5835 double UrgentLatency,
5836 double CursorBufferSize,
5837 unsigned int CursorWidth,
5838 unsigned int CursorBPP,
5839 double VRatio,
5840 double VRatioC,
5841 double BytePerPixelInDETY,
5842 double BytePerPixelInDETC,
5843 double DETBufferSizeY,
5844 double DETBufferSizeC,
5845 double *UrgentBurstFactorCursor,
5846 double *UrgentBurstFactorLuma,
5847 double *UrgentBurstFactorChroma,
5848 bool *NotEnoughUrgentLatencyHiding)
5849 {
5850 double LinesInDETLuma;
5851 double LinesInDETChroma;
5852 unsigned int LinesInCursorBuffer;
5853 double CursorBufferSizeInTime;
5854 double DETBufferSizeInTimeLuma;
5855 double DETBufferSizeInTimeChroma;
5856
5857 *NotEnoughUrgentLatencyHiding = 0;
5858
5859 if (CursorWidth > 0) {
5860 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5861 if (VRatio > 0) {
5862 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5863 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5864 *NotEnoughUrgentLatencyHiding = 1;
5865 *UrgentBurstFactorCursor = 0;
5866 } else {
5867 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5868 }
5869 } else {
5870 *UrgentBurstFactorCursor = 1;
5871 }
5872 }
5873
5874 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5875 if (VRatio > 0) {
5876 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5877 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5878 *NotEnoughUrgentLatencyHiding = 1;
5879 *UrgentBurstFactorLuma = 0;
5880 } else {
5881 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5882 }
5883 } else {
5884 *UrgentBurstFactorLuma = 1;
5885 }
5886
5887 if (BytePerPixelInDETC > 0) {
5888 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5889 if (VRatio > 0) {
5890 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5891 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5892 *NotEnoughUrgentLatencyHiding = 1;
5893 *UrgentBurstFactorChroma = 0;
5894 } else {
5895 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5896 }
5897 } else {
5898 *UrgentBurstFactorChroma = 1;
5899 }
5900 }
5901 }
5902
5903 static void CalculatePixelDeliveryTimes(
5904 unsigned int NumberOfActivePlanes,
5905 double VRatio[],
5906 double VRatioChroma[],
5907 double VRatioPrefetchY[],
5908 double VRatioPrefetchC[],
5909 unsigned int swath_width_luma_ub[],
5910 unsigned int swath_width_chroma_ub[],
5911 unsigned int DPPPerPlane[],
5912 double HRatio[],
5913 double HRatioChroma[],
5914 double PixelClock[],
5915 double PSCL_THROUGHPUT[],
5916 double PSCL_THROUGHPUT_CHROMA[],
5917 double DPPCLK[],
5918 int BytePerPixelC[],
5919 enum scan_direction_class SourceScan[],
5920 unsigned int NumberOfCursors[],
5921 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5922 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5923 unsigned int BlockWidth256BytesY[],
5924 unsigned int BlockHeight256BytesY[],
5925 unsigned int BlockWidth256BytesC[],
5926 unsigned int BlockHeight256BytesC[],
5927 double DisplayPipeLineDeliveryTimeLuma[],
5928 double DisplayPipeLineDeliveryTimeChroma[],
5929 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5930 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5931 double DisplayPipeRequestDeliveryTimeLuma[],
5932 double DisplayPipeRequestDeliveryTimeChroma[],
5933 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5934 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5935 double CursorRequestDeliveryTime[],
5936 double CursorRequestDeliveryTimePrefetch[])
5937 {
5938 double req_per_swath_ub;
5939 int k;
5940
5941 for (k = 0; k < NumberOfActivePlanes; ++k) {
5942 if (VRatio[k] <= 1) {
5943 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5944 } else {
5945 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5946 }
5947
5948 if (BytePerPixelC[k] == 0) {
5949 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5950 } else {
5951 if (VRatioChroma[k] <= 1) {
5952 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5953 } else {
5954 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5955 }
5956 }
5957
5958 if (VRatioPrefetchY[k] <= 1) {
5959 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5960 } else {
5961 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5962 }
5963
5964 if (BytePerPixelC[k] == 0) {
5965 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5966 } else {
5967 if (VRatioPrefetchC[k] <= 1) {
5968 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5969 } else {
5970 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5971 }
5972 }
5973 }
5974
5975 for (k = 0; k < NumberOfActivePlanes; ++k) {
5976 if (SourceScan[k] != dm_vert) {
5977 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5978 } else {
5979 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5980 }
5981 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5982 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5983 if (BytePerPixelC[k] == 0) {
5984 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5985 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5986 } else {
5987 if (SourceScan[k] != dm_vert) {
5988 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5989 } else {
5990 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5991 }
5992 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5993 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5994 }
5995 #ifdef __DML_VBA_DEBUG__
5996 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5997 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5998 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5999 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6000 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6001 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6002 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6003 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6004 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6005 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6006 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6007 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6008 #endif
6009 }
6010
6011 for (k = 0; k < NumberOfActivePlanes; ++k) {
6012 int cursor_req_per_width;
6013 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6014 if (NumberOfCursors[k] > 0) {
6015 if (VRatio[k] <= 1) {
6016 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6017 } else {
6018 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6019 }
6020 if (VRatioPrefetchY[k] <= 1) {
6021 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6022 } else {
6023 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6024 }
6025 } else {
6026 CursorRequestDeliveryTime[k] = 0;
6027 CursorRequestDeliveryTimePrefetch[k] = 0;
6028 }
6029 #ifdef __DML_VBA_DEBUG__
6030 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6031 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6032 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6033 #endif
6034 }
6035 }
6036
6037 static void CalculateMetaAndPTETimes(
6038 int NumberOfActivePlanes,
6039 bool GPUVMEnable,
6040 int MetaChunkSize,
6041 int MinMetaChunkSizeBytes,
6042 int HTotal[],
6043 double VRatio[],
6044 double VRatioChroma[],
6045 double DestinationLinesToRequestRowInVBlank[],
6046 double DestinationLinesToRequestRowInImmediateFlip[],
6047 bool DCCEnable[],
6048 double PixelClock[],
6049 int BytePerPixelY[],
6050 int BytePerPixelC[],
6051 enum scan_direction_class SourceScan[],
6052 int dpte_row_height[],
6053 int dpte_row_height_chroma[],
6054 int meta_row_width[],
6055 int meta_row_width_chroma[],
6056 int meta_row_height[],
6057 int meta_row_height_chroma[],
6058 int meta_req_width[],
6059 int meta_req_width_chroma[],
6060 int meta_req_height[],
6061 int meta_req_height_chroma[],
6062 int dpte_group_bytes[],
6063 int PTERequestSizeY[],
6064 int PTERequestSizeC[],
6065 int PixelPTEReqWidthY[],
6066 int PixelPTEReqHeightY[],
6067 int PixelPTEReqWidthC[],
6068 int PixelPTEReqHeightC[],
6069 int dpte_row_width_luma_ub[],
6070 int dpte_row_width_chroma_ub[],
6071 double DST_Y_PER_PTE_ROW_NOM_L[],
6072 double DST_Y_PER_PTE_ROW_NOM_C[],
6073 double DST_Y_PER_META_ROW_NOM_L[],
6074 double DST_Y_PER_META_ROW_NOM_C[],
6075 double TimePerMetaChunkNominal[],
6076 double TimePerChromaMetaChunkNominal[],
6077 double TimePerMetaChunkVBlank[],
6078 double TimePerChromaMetaChunkVBlank[],
6079 double TimePerMetaChunkFlip[],
6080 double TimePerChromaMetaChunkFlip[],
6081 double time_per_pte_group_nom_luma[],
6082 double time_per_pte_group_vblank_luma[],
6083 double time_per_pte_group_flip_luma[],
6084 double time_per_pte_group_nom_chroma[],
6085 double time_per_pte_group_vblank_chroma[],
6086 double time_per_pte_group_flip_chroma[])
6087 {
6088 unsigned int meta_chunk_width;
6089 unsigned int min_meta_chunk_width;
6090 unsigned int meta_chunk_per_row_int;
6091 unsigned int meta_row_remainder;
6092 unsigned int meta_chunk_threshold;
6093 unsigned int meta_chunks_per_row_ub;
6094 unsigned int meta_chunk_width_chroma;
6095 unsigned int min_meta_chunk_width_chroma;
6096 unsigned int meta_chunk_per_row_int_chroma;
6097 unsigned int meta_row_remainder_chroma;
6098 unsigned int meta_chunk_threshold_chroma;
6099 unsigned int meta_chunks_per_row_ub_chroma;
6100 unsigned int dpte_group_width_luma;
6101 unsigned int dpte_groups_per_row_luma_ub;
6102 unsigned int dpte_group_width_chroma;
6103 unsigned int dpte_groups_per_row_chroma_ub;
6104 int k;
6105
6106 for (k = 0; k < NumberOfActivePlanes; ++k) {
6107 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6108 if (BytePerPixelC[k] == 0) {
6109 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6110 } else {
6111 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6112 }
6113 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6114 if (BytePerPixelC[k] == 0) {
6115 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6116 } else {
6117 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6118 }
6119 }
6120
6121 for (k = 0; k < NumberOfActivePlanes; ++k) {
6122 if (DCCEnable[k] == true) {
6123 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6124 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6125 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6126 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6127 if (SourceScan[k] != dm_vert) {
6128 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6129 } else {
6130 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6131 }
6132 if (meta_row_remainder <= meta_chunk_threshold) {
6133 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6134 } else {
6135 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6136 }
6137 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6138 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6139 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6140 if (BytePerPixelC[k] == 0) {
6141 TimePerChromaMetaChunkNominal[k] = 0;
6142 TimePerChromaMetaChunkVBlank[k] = 0;
6143 TimePerChromaMetaChunkFlip[k] = 0;
6144 } else {
6145 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6146 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6147 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6148 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6149 if (SourceScan[k] != dm_vert) {
6150 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6151 } else {
6152 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6153 }
6154 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6155 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6156 } else {
6157 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6158 }
6159 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6160 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6161 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6162 }
6163 } else {
6164 TimePerMetaChunkNominal[k] = 0;
6165 TimePerMetaChunkVBlank[k] = 0;
6166 TimePerMetaChunkFlip[k] = 0;
6167 TimePerChromaMetaChunkNominal[k] = 0;
6168 TimePerChromaMetaChunkVBlank[k] = 0;
6169 TimePerChromaMetaChunkFlip[k] = 0;
6170 }
6171 }
6172
6173 for (k = 0; k < NumberOfActivePlanes; ++k) {
6174 if (GPUVMEnable == true) {
6175 if (SourceScan[k] != dm_vert) {
6176 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6177 } else {
6178 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6179 }
6180 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6181 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6182 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6183 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6184 if (BytePerPixelC[k] == 0) {
6185 time_per_pte_group_nom_chroma[k] = 0;
6186 time_per_pte_group_vblank_chroma[k] = 0;
6187 time_per_pte_group_flip_chroma[k] = 0;
6188 } else {
6189 if (SourceScan[k] != dm_vert) {
6190 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6191 } else {
6192 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6193 }
6194 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6195 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6196 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6197 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6198 }
6199 } else {
6200 time_per_pte_group_nom_luma[k] = 0;
6201 time_per_pte_group_vblank_luma[k] = 0;
6202 time_per_pte_group_flip_luma[k] = 0;
6203 time_per_pte_group_nom_chroma[k] = 0;
6204 time_per_pte_group_vblank_chroma[k] = 0;
6205 time_per_pte_group_flip_chroma[k] = 0;
6206 }
6207 }
6208 }
6209
6210 static void CalculateVMGroupAndRequestTimes(
6211 unsigned int NumberOfActivePlanes,
6212 bool GPUVMEnable,
6213 unsigned int GPUVMMaxPageTableLevels,
6214 unsigned int HTotal[],
6215 int BytePerPixelC[],
6216 double DestinationLinesToRequestVMInVBlank[],
6217 double DestinationLinesToRequestVMInImmediateFlip[],
6218 bool DCCEnable[],
6219 double PixelClock[],
6220 int dpte_row_width_luma_ub[],
6221 int dpte_row_width_chroma_ub[],
6222 int vm_group_bytes[],
6223 unsigned int dpde0_bytes_per_frame_ub_l[],
6224 unsigned int dpde0_bytes_per_frame_ub_c[],
6225 int meta_pte_bytes_per_frame_ub_l[],
6226 int meta_pte_bytes_per_frame_ub_c[],
6227 double TimePerVMGroupVBlank[],
6228 double TimePerVMGroupFlip[],
6229 double TimePerVMRequestVBlank[],
6230 double TimePerVMRequestFlip[])
6231 {
6232 int num_group_per_lower_vm_stage;
6233 int num_req_per_lower_vm_stage;
6234 int k;
6235
6236 for (k = 0; k < NumberOfActivePlanes; ++k) {
6237 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6238 if (DCCEnable[k] == false) {
6239 if (BytePerPixelC[k] > 0) {
6240 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6241 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6242 } else {
6243 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6244 }
6245 } else {
6246 if (GPUVMMaxPageTableLevels == 1) {
6247 if (BytePerPixelC[k] > 0) {
6248 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6249 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6250 } else {
6251 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6252 }
6253 } else {
6254 if (BytePerPixelC[k] > 0) {
6255 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6256 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6257 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6258 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6259 } else {
6260 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6261 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6262 }
6263 }
6264 }
6265
6266 if (DCCEnable[k] == false) {
6267 if (BytePerPixelC[k] > 0) {
6268 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6269 } else {
6270 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6271 }
6272 } else {
6273 if (GPUVMMaxPageTableLevels == 1) {
6274 if (BytePerPixelC[k] > 0) {
6275 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6276 } else {
6277 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6278 }
6279 } else {
6280 if (BytePerPixelC[k] > 0) {
6281 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6282 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6283 } else {
6284 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6285 }
6286 }
6287 }
6288
6289 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6290 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6291 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6292 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6293
6294 if (GPUVMMaxPageTableLevels > 2) {
6295 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6296 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6297 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6298 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6299 }
6300
6301 } else {
6302 TimePerVMGroupVBlank[k] = 0;
6303 TimePerVMGroupFlip[k] = 0;
6304 TimePerVMRequestVBlank[k] = 0;
6305 TimePerVMRequestFlip[k] = 0;
6306 }
6307 }
6308 }
6309
6310 static void CalculateStutterEfficiency(
6311 struct display_mode_lib *mode_lib,
6312 int CompressedBufferSizeInkByte,
6313 bool UnboundedRequestEnabled,
6314 int ConfigReturnBufferSizeInKByte,
6315 int MetaFIFOSizeInKEntries,
6316 int ZeroSizeBufferEntries,
6317 int NumberOfActivePlanes,
6318 int ROBBufferSizeInKByte,
6319 double TotalDataReadBandwidth,
6320 double DCFCLK,
6321 double ReturnBW,
6322 double COMPBUF_RESERVED_SPACE_64B,
6323 double COMPBUF_RESERVED_SPACE_ZS,
6324 double SRExitTime,
6325 double SRExitZ8Time,
6326 bool SynchronizedVBlank,
6327 double Z8StutterEnterPlusExitWatermark,
6328 double StutterEnterPlusExitWatermark,
6329 bool ProgressiveToInterlaceUnitInOPP,
6330 bool Interlace[],
6331 double MinTTUVBlank[],
6332 int DPPPerPlane[],
6333 unsigned int DETBufferSizeY[],
6334 int BytePerPixelY[],
6335 double BytePerPixelDETY[],
6336 double SwathWidthY[],
6337 int SwathHeightY[],
6338 int SwathHeightC[],
6339 double NetDCCRateLuma[],
6340 double NetDCCRateChroma[],
6341 double DCCFractionOfZeroSizeRequestsLuma[],
6342 double DCCFractionOfZeroSizeRequestsChroma[],
6343 int HTotal[],
6344 int VTotal[],
6345 double PixelClock[],
6346 double VRatio[],
6347 enum scan_direction_class SourceScan[],
6348 int BlockHeight256BytesY[],
6349 int BlockWidth256BytesY[],
6350 int BlockHeight256BytesC[],
6351 int BlockWidth256BytesC[],
6352 int DCCYMaxUncompressedBlock[],
6353 int DCCCMaxUncompressedBlock[],
6354 int VActive[],
6355 bool DCCEnable[],
6356 bool WritebackEnable[],
6357 double ReadBandwidthPlaneLuma[],
6358 double ReadBandwidthPlaneChroma[],
6359 double meta_row_bw[],
6360 double dpte_row_bw[],
6361 double *StutterEfficiencyNotIncludingVBlank,
6362 double *StutterEfficiency,
6363 int *NumberOfStutterBurstsPerFrame,
6364 double *Z8StutterEfficiencyNotIncludingVBlank,
6365 double *Z8StutterEfficiency,
6366 int *Z8NumberOfStutterBurstsPerFrame,
6367 double *StutterPeriod)
6368 {
6369 struct vba_vars_st *v = &mode_lib->vba;
6370
6371 double DETBufferingTimeY;
6372 double SwathWidthYCriticalPlane = 0;
6373 double VActiveTimeCriticalPlane = 0;
6374 double FrameTimeCriticalPlane = 0;
6375 int BytePerPixelYCriticalPlane = 0;
6376 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6377 double MinTTUVBlankCriticalPlane = 0;
6378 double TotalCompressedReadBandwidth;
6379 double TotalRowReadBandwidth;
6380 double AverageDCCCompressionRate;
6381 double EffectiveCompressedBufferSize;
6382 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6383 double StutterBurstTime;
6384 int TotalActiveWriteback;
6385 double LinesInDETY;
6386 double LinesInDETYRoundedDownToSwath;
6387 double MaximumEffectiveCompressionLuma;
6388 double MaximumEffectiveCompressionChroma;
6389 double TotalZeroSizeRequestReadBandwidth;
6390 double TotalZeroSizeCompressedReadBandwidth;
6391 double AverageDCCZeroSizeFraction;
6392 double AverageZeroSizeCompressionRate;
6393 int TotalNumberOfActiveOTG = 0;
6394 double LastStutterPeriod = 0.0;
6395 double LastZ8StutterPeriod = 0.0;
6396 int k;
6397
6398 TotalZeroSizeRequestReadBandwidth = 0;
6399 TotalZeroSizeCompressedReadBandwidth = 0;
6400 TotalRowReadBandwidth = 0;
6401 TotalCompressedReadBandwidth = 0;
6402
6403 for (k = 0; k < NumberOfActivePlanes; ++k) {
6404 if (DCCEnable[k] == true) {
6405 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6406 || DCCYMaxUncompressedBlock[k] < 256) {
6407 MaximumEffectiveCompressionLuma = 2;
6408 } else {
6409 MaximumEffectiveCompressionLuma = 4;
6410 }
6411 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6412 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6413 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6414 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6415 if (ReadBandwidthPlaneChroma[k] > 0) {
6416 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6417 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6418 MaximumEffectiveCompressionChroma = 2;
6419 } else {
6420 MaximumEffectiveCompressionChroma = 4;
6421 }
6422 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6423 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6424 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6425 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6426 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6427 }
6428 } else {
6429 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6430 }
6431 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6432 }
6433
6434 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6435 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6436
6437 #ifdef __DML_VBA_DEBUG__
6438 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6439 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6440 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6441 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6442 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6443 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6444 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6445 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6446 #endif
6447
6448 if (AverageDCCZeroSizeFraction == 1) {
6449 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6450 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6451 } else if (AverageDCCZeroSizeFraction > 0) {
6452 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6453 EffectiveCompressedBufferSize = dml_min(
6454 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6455 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6456 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6457 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6458 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6459 dml_print(
6460 "DML::%s: min 2 = %f\n",
6461 __func__,
6462 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6463 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6464 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6465 } else {
6466 EffectiveCompressedBufferSize = dml_min(
6467 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6468 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6469 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6470 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6471 }
6472
6473 #ifdef __DML_VBA_DEBUG__
6474 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6475 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6476 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6477 #endif
6478
6479 *StutterPeriod = 0;
6480 for (k = 0; k < NumberOfActivePlanes; ++k) {
6481 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6482 / BytePerPixelDETY[k] / SwathWidthY[k];
6483 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6484 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6485 #ifdef __DML_VBA_DEBUG__
6486 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6487 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6488 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6489 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6490 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6491 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6492 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6493 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6494 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6495 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6496 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6497 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6498 #endif
6499
6500 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6501 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6502
6503 *StutterPeriod = DETBufferingTimeY;
6504 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6505 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6506 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6507 SwathWidthYCriticalPlane = SwathWidthY[k];
6508 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6509 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6510
6511 #ifdef __DML_VBA_DEBUG__
6512 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6513 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6514 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6515 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6516 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6517 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6518 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6519 #endif
6520 }
6521 }
6522
6523 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6524 #ifdef __DML_VBA_DEBUG__
6525 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6526 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6527 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6528 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6529 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6530 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6531 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6532 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6533 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6534 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6535 #endif
6536
6537 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6538 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6539 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6540 #ifdef __DML_VBA_DEBUG__
6541 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6542 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6543 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6544 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6545 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6546 #endif
6547 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6548
6549 dml_print(
6550 "DML::%s: Time to finish residue swath=%f\n",
6551 __func__,
6552 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6553
6554 TotalActiveWriteback = 0;
6555 for (k = 0; k < NumberOfActivePlanes; ++k) {
6556 if (WritebackEnable[k]) {
6557 TotalActiveWriteback = TotalActiveWriteback + 1;
6558 }
6559 }
6560
6561 if (TotalActiveWriteback == 0) {
6562 #ifdef __DML_VBA_DEBUG__
6563 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6564 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6565 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6566 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6567 #endif
6568 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6569 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6570 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6571 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6572 } else {
6573 *StutterEfficiencyNotIncludingVBlank = 0.;
6574 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6575 *NumberOfStutterBurstsPerFrame = 0;
6576 *Z8NumberOfStutterBurstsPerFrame = 0;
6577 }
6578 #ifdef __DML_VBA_DEBUG__
6579 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6580 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6581 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6582 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6583 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6584 #endif
6585
6586 for (k = 0; k < NumberOfActivePlanes; ++k) {
6587 if (v->BlendingAndTiming[k] == k) {
6588 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6589 }
6590 }
6591
6592 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6593 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6594
6595 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6596 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6597 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6598 } else {
6599 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6600 }
6601 } else {
6602 *StutterEfficiency = 0;
6603 }
6604
6605 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6606 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6607 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6608 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6609 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6610 } else {
6611 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6612 }
6613 } else {
6614 *Z8StutterEfficiency = 0.;
6615 }
6616
6617 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6618 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6619 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6620 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6621 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6622 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6623 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6624 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6625 }
6626
6627 static void CalculateSwathAndDETConfiguration(
6628 bool ForceSingleDPP,
6629 int NumberOfActivePlanes,
6630 unsigned int DETBufferSizeInKByte,
6631 double MaximumSwathWidthLuma[],
6632 double MaximumSwathWidthChroma[],
6633 enum scan_direction_class SourceScan[],
6634 enum source_format_class SourcePixelFormat[],
6635 enum dm_swizzle_mode SurfaceTiling[],
6636 int ViewportWidth[],
6637 int ViewportHeight[],
6638 int SurfaceWidthY[],
6639 int SurfaceWidthC[],
6640 int SurfaceHeightY[],
6641 int SurfaceHeightC[],
6642 int Read256BytesBlockHeightY[],
6643 int Read256BytesBlockHeightC[],
6644 int Read256BytesBlockWidthY[],
6645 int Read256BytesBlockWidthC[],
6646 enum odm_combine_mode ODMCombineEnabled[],
6647 int BlendingAndTiming[],
6648 int BytePerPixY[],
6649 int BytePerPixC[],
6650 double BytePerPixDETY[],
6651 double BytePerPixDETC[],
6652 int HActive[],
6653 double HRatio[],
6654 double HRatioChroma[],
6655 int DPPPerPlane[],
6656 int swath_width_luma_ub[],
6657 int swath_width_chroma_ub[],
6658 double SwathWidth[],
6659 double SwathWidthChroma[],
6660 int SwathHeightY[],
6661 int SwathHeightC[],
6662 unsigned int DETBufferSizeY[],
6663 unsigned int DETBufferSizeC[],
6664 bool ViewportSizeSupportPerPlane[],
6665 bool *ViewportSizeSupport)
6666 {
6667 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6668 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6669 int MinimumSwathHeightY;
6670 int MinimumSwathHeightC;
6671 int RoundedUpMaxSwathSizeBytesY;
6672 int RoundedUpMaxSwathSizeBytesC;
6673 int RoundedUpMinSwathSizeBytesY;
6674 int RoundedUpMinSwathSizeBytesC;
6675 int RoundedUpSwathSizeBytesY;
6676 int RoundedUpSwathSizeBytesC;
6677 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6678 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6679 int k;
6680
6681 CalculateSwathWidth(
6682 ForceSingleDPP,
6683 NumberOfActivePlanes,
6684 SourcePixelFormat,
6685 SourceScan,
6686 ViewportWidth,
6687 ViewportHeight,
6688 SurfaceWidthY,
6689 SurfaceWidthC,
6690 SurfaceHeightY,
6691 SurfaceHeightC,
6692 ODMCombineEnabled,
6693 BytePerPixY,
6694 BytePerPixC,
6695 Read256BytesBlockHeightY,
6696 Read256BytesBlockHeightC,
6697 Read256BytesBlockWidthY,
6698 Read256BytesBlockWidthC,
6699 BlendingAndTiming,
6700 HActive,
6701 HRatio,
6702 DPPPerPlane,
6703 SwathWidthSingleDPP,
6704 SwathWidthSingleDPPChroma,
6705 SwathWidth,
6706 SwathWidthChroma,
6707 MaximumSwathHeightY,
6708 MaximumSwathHeightC,
6709 swath_width_luma_ub,
6710 swath_width_chroma_ub);
6711
6712 *ViewportSizeSupport = true;
6713 for (k = 0; k < NumberOfActivePlanes; ++k) {
6714 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6715 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6716 if (SurfaceTiling[k] == dm_sw_linear
6717 || (SourcePixelFormat[k] == dm_444_64
6718 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6719 && SourceScan[k] != dm_vert)) {
6720 MinimumSwathHeightY = MaximumSwathHeightY[k];
6721 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6722 MinimumSwathHeightY = MaximumSwathHeightY[k];
6723 } else {
6724 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6725 }
6726 MinimumSwathHeightC = MaximumSwathHeightC[k];
6727 } else {
6728 if (SurfaceTiling[k] == dm_sw_linear) {
6729 MinimumSwathHeightY = MaximumSwathHeightY[k];
6730 MinimumSwathHeightC = MaximumSwathHeightC[k];
6731 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6732 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6733 MinimumSwathHeightC = MaximumSwathHeightC[k];
6734 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6735 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6736 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6737 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6738 MinimumSwathHeightY = MaximumSwathHeightY[k];
6739 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6740 } else {
6741 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6742 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6743 }
6744 }
6745
6746 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6747 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6748 if (SourcePixelFormat[k] == dm_420_10) {
6749 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6750 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6751 }
6752 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6753 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6754 if (SourcePixelFormat[k] == dm_420_10) {
6755 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6756 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6757 }
6758
6759 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6760 SwathHeightY[k] = MaximumSwathHeightY[k];
6761 SwathHeightC[k] = MaximumSwathHeightC[k];
6762 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6763 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6764 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6765 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6766 SwathHeightY[k] = MinimumSwathHeightY;
6767 SwathHeightC[k] = MaximumSwathHeightC[k];
6768 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6769 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6770 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6771 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6772 SwathHeightY[k] = MaximumSwathHeightY[k];
6773 SwathHeightC[k] = MinimumSwathHeightC;
6774 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6775 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6776 } else {
6777 SwathHeightY[k] = MinimumSwathHeightY;
6778 SwathHeightC[k] = MinimumSwathHeightC;
6779 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6780 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6781 }
6782 {
6783 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6784 if (SwathHeightC[k] == 0) {
6785 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6786 DETBufferSizeC[k] = 0;
6787 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6788 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6789 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6790 } else {
6791 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6792 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6793 }
6794
6795 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6796 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6797 *ViewportSizeSupport = false;
6798 ViewportSizeSupportPerPlane[k] = false;
6799 } else {
6800 ViewportSizeSupportPerPlane[k] = true;
6801 }
6802 }
6803 }
6804 }
6805
6806 static void CalculateSwathWidth(
6807 bool ForceSingleDPP,
6808 int NumberOfActivePlanes,
6809 enum source_format_class SourcePixelFormat[],
6810 enum scan_direction_class SourceScan[],
6811 int ViewportWidth[],
6812 int ViewportHeight[],
6813 int SurfaceWidthY[],
6814 int SurfaceWidthC[],
6815 int SurfaceHeightY[],
6816 int SurfaceHeightC[],
6817 enum odm_combine_mode ODMCombineEnabled[],
6818 int BytePerPixY[],
6819 int BytePerPixC[],
6820 int Read256BytesBlockHeightY[],
6821 int Read256BytesBlockHeightC[],
6822 int Read256BytesBlockWidthY[],
6823 int Read256BytesBlockWidthC[],
6824 int BlendingAndTiming[],
6825 int HActive[],
6826 double HRatio[],
6827 int DPPPerPlane[],
6828 double SwathWidthSingleDPPY[],
6829 double SwathWidthSingleDPPC[],
6830 double SwathWidthY[],
6831 double SwathWidthC[],
6832 int MaximumSwathHeightY[],
6833 int MaximumSwathHeightC[],
6834 int swath_width_luma_ub[],
6835 int swath_width_chroma_ub[])
6836 {
6837 enum odm_combine_mode MainPlaneODMCombine;
6838 int j, k;
6839
6840 #ifdef __DML_VBA_DEBUG__
6841 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6842 #endif
6843
6844 for (k = 0; k < NumberOfActivePlanes; ++k) {
6845 if (SourceScan[k] != dm_vert) {
6846 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6847 } else {
6848 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6849 }
6850
6851 #ifdef __DML_VBA_DEBUG__
6852 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6853 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6854 #endif
6855
6856 MainPlaneODMCombine = ODMCombineEnabled[k];
6857 for (j = 0; j < NumberOfActivePlanes; ++j) {
6858 if (BlendingAndTiming[k] == j) {
6859 MainPlaneODMCombine = ODMCombineEnabled[j];
6860 }
6861 }
6862
6863 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6864 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6865 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6866 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6867 } else if (DPPPerPlane[k] == 2) {
6868 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6869 } else {
6870 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6871 }
6872
6873 #ifdef __DML_VBA_DEBUG__
6874 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6875 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6876 #endif
6877
6878 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6879 SwathWidthC[k] = SwathWidthY[k] / 2;
6880 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6881 } else {
6882 SwathWidthC[k] = SwathWidthY[k];
6883 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6884 }
6885
6886 if (ForceSingleDPP == true) {
6887 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6888 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6889 }
6890 {
6891 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6892 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6893 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6894 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6895
6896 #ifdef __DML_VBA_DEBUG__
6897 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6898 #endif
6899
6900 if (SourceScan[k] != dm_vert) {
6901 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6902 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6903 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6904 if (BytePerPixC[k] > 0) {
6905 swath_width_chroma_ub[k] = dml_min(
6906 surface_width_ub_c,
6907 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6908 } else {
6909 swath_width_chroma_ub[k] = 0;
6910 }
6911 } else {
6912 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6913 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6914 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6915 if (BytePerPixC[k] > 0) {
6916 swath_width_chroma_ub[k] = dml_min(
6917 surface_height_ub_c,
6918 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6919 } else {
6920 swath_width_chroma_ub[k] = 0;
6921 }
6922 }
6923 }
6924 }
6925 }
6926
6927 static double CalculateExtraLatency(
6928 int RoundTripPingLatencyCycles,
6929 int ReorderingBytes,
6930 double DCFCLK,
6931 int TotalNumberOfActiveDPP,
6932 int PixelChunkSizeInKByte,
6933 int TotalNumberOfDCCActiveDPP,
6934 int MetaChunkSize,
6935 double ReturnBW,
6936 bool GPUVMEnable,
6937 bool HostVMEnable,
6938 int NumberOfActivePlanes,
6939 int NumberOfDPP[],
6940 int dpte_group_bytes[],
6941 double HostVMInefficiencyFactor,
6942 double HostVMMinPageSize,
6943 int HostVMMaxNonCachedPageTableLevels)
6944 {
6945 double ExtraLatencyBytes;
6946 double ExtraLatency;
6947
6948 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6949 ReorderingBytes,
6950 TotalNumberOfActiveDPP,
6951 PixelChunkSizeInKByte,
6952 TotalNumberOfDCCActiveDPP,
6953 MetaChunkSize,
6954 GPUVMEnable,
6955 HostVMEnable,
6956 NumberOfActivePlanes,
6957 NumberOfDPP,
6958 dpte_group_bytes,
6959 HostVMInefficiencyFactor,
6960 HostVMMinPageSize,
6961 HostVMMaxNonCachedPageTableLevels);
6962
6963 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6964
6965 #ifdef __DML_VBA_DEBUG__
6966 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6967 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6968 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6969 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6970 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6971 #endif
6972
6973 return ExtraLatency;
6974 }
6975
6976 static double CalculateExtraLatencyBytes(
6977 int ReorderingBytes,
6978 int TotalNumberOfActiveDPP,
6979 int PixelChunkSizeInKByte,
6980 int TotalNumberOfDCCActiveDPP,
6981 int MetaChunkSize,
6982 bool GPUVMEnable,
6983 bool HostVMEnable,
6984 int NumberOfActivePlanes,
6985 int NumberOfDPP[],
6986 int dpte_group_bytes[],
6987 double HostVMInefficiencyFactor,
6988 double HostVMMinPageSize,
6989 int HostVMMaxNonCachedPageTableLevels)
6990 {
6991 double ret;
6992 int HostVMDynamicLevels = 0, k;
6993
6994 if (GPUVMEnable == true && HostVMEnable == true) {
6995 if (HostVMMinPageSize < 2048) {
6996 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6997 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6998 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6999 } else {
7000 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7001 }
7002 } else {
7003 HostVMDynamicLevels = 0;
7004 }
7005
7006 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7007
7008 if (GPUVMEnable == true) {
7009 for (k = 0; k < NumberOfActivePlanes; ++k) {
7010 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7011 }
7012 }
7013 return ret;
7014 }
7015
7016 static double CalculateUrgentLatency(
7017 double UrgentLatencyPixelDataOnly,
7018 double UrgentLatencyPixelMixedWithVMData,
7019 double UrgentLatencyVMDataOnly,
7020 bool DoUrgentLatencyAdjustment,
7021 double UrgentLatencyAdjustmentFabricClockComponent,
7022 double UrgentLatencyAdjustmentFabricClockReference,
7023 double FabricClock)
7024 {
7025 double ret;
7026
7027 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7028 if (DoUrgentLatencyAdjustment == true) {
7029 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7030 }
7031 return ret;
7032 }
7033
7034 static void UseMinimumDCFCLK(
7035 struct display_mode_lib *mode_lib,
7036 int MaxInterDCNTileRepeaters,
7037 int MaxPrefetchMode,
7038 double FinalDRAMClockChangeLatency,
7039 double SREnterPlusExitTime,
7040 int ReturnBusWidth,
7041 int RoundTripPingLatencyCycles,
7042 int ReorderingBytes,
7043 int PixelChunkSizeInKByte,
7044 int MetaChunkSize,
7045 bool GPUVMEnable,
7046 int GPUVMMaxPageTableLevels,
7047 bool HostVMEnable,
7048 int NumberOfActivePlanes,
7049 double HostVMMinPageSize,
7050 int HostVMMaxNonCachedPageTableLevels,
7051 bool DynamicMetadataVMEnabled,
7052 enum immediate_flip_requirement ImmediateFlipRequirement,
7053 bool ProgressiveToInterlaceUnitInOPP,
7054 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
7055 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
7056 int VTotal[],
7057 int VActive[],
7058 int DynamicMetadataTransmittedBytes[],
7059 int DynamicMetadataLinesBeforeActiveRequired[],
7060 bool Interlace[],
7061 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
7062 double RequiredDISPCLK[][2],
7063 double UrgLatency[],
7064 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
7065 double ProjectedDCFCLKDeepSleep[][2],
7066 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
7067 double TotalVActivePixelBandwidth[][2],
7068 double TotalVActiveCursorBandwidth[][2],
7069 double TotalMetaRowBandwidth[][2],
7070 double TotalDPTERowBandwidth[][2],
7071 unsigned int TotalNumberOfActiveDPP[][2],
7072 unsigned int TotalNumberOfDCCActiveDPP[][2],
7073 int dpte_group_bytes[],
7074 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
7075 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
7076 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
7077 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
7078 int BytePerPixelY[],
7079 int BytePerPixelC[],
7080 int HTotal[],
7081 double PixelClock[],
7082 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
7083 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
7084 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
7085 bool DynamicMetadataEnable[],
7086 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
7087 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
7088 double ReadBandwidthLuma[],
7089 double ReadBandwidthChroma[],
7090 double DCFCLKPerState[],
7091 double DCFCLKState[][2])
7092 {
7093 struct vba_vars_st *v = &mode_lib->vba;
7094 int dummy1, i, j, k;
7095 double NormalEfficiency, dummy2, dummy3;
7096 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7097
7098 NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7099 for (i = 0; i < v->soc.num_states; ++i) {
7100 for (j = 0; j <= 1; ++j) {
7101 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7102 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7103 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7104 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7105 double MinimumTWait;
7106 double NonDPTEBandwidth;
7107 double DPTEBandwidth;
7108 double DCFCLKRequiredForAverageBandwidth;
7109 double ExtraLatencyBytes;
7110 double ExtraLatencyCycles;
7111 double DCFCLKRequiredForPeakBandwidth;
7112 int NoOfDPPState[DC__NUM_DPP__MAX];
7113 double MinimumTvmPlus2Tr0;
7114
7115 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7116 for (k = 0; k < NumberOfActivePlanes; ++k) {
7117 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7118 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
7119 }
7120
7121 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7122 NoOfDPPState[k] = NoOfDPP[i][j][k];
7123 }
7124
7125 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
7126 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
7127 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
7128 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
7129 DCFCLKRequiredForAverageBandwidth = dml_max3(
7130 ProjectedDCFCLKDeepSleep[i][j],
7131 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth
7132 / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7133 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth);
7134
7135 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7136 ReorderingBytes,
7137 TotalNumberOfActiveDPP[i][j],
7138 PixelChunkSizeInKByte,
7139 TotalNumberOfDCCActiveDPP[i][j],
7140 MetaChunkSize,
7141 GPUVMEnable,
7142 HostVMEnable,
7143 NumberOfActivePlanes,
7144 NoOfDPPState,
7145 dpte_group_bytes,
7146 1,
7147 HostVMMinPageSize,
7148 HostVMMaxNonCachedPageTableLevels);
7149 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
7150 for (k = 0; k < NumberOfActivePlanes; ++k) {
7151 double DCFCLKCyclesRequiredInPrefetch;
7152 double ExpectedPrefetchBWAcceleration;
7153 double PrefetchTime;
7154
7155 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
7156 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
7157 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7158 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7159 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth
7160 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7161 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
7162 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k])
7163 / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
7164 DynamicMetadataVMExtraLatency[k] =
7165 (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
7166 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7167 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait
7168 - UrgLatency[i]
7169 * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2)
7170 * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7171 - DynamicMetadataVMExtraLatency[k];
7172
7173 if (PrefetchTime > 0) {
7174 double ExpectedVRatioPrefetch;
7175 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7176 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7177 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7178 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7179 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
7180 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7181 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth;
7182 }
7183 } else {
7184 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7185 }
7186 if (DynamicMetadataEnable[k] == true) {
7187 double TSetupPipe;
7188 double TdmbfPipe;
7189 double TdmsksPipe;
7190 double TdmecPipe;
7191 double AllowedTimeForUrgentExtraLatency;
7192
7193 CalculateVupdateAndDynamicMetadataParameters(
7194 MaxInterDCNTileRepeaters,
7195 RequiredDPPCLK[i][j][k],
7196 RequiredDISPCLK[i][j],
7197 ProjectedDCFCLKDeepSleep[i][j],
7198 PixelClock[k],
7199 HTotal[k],
7200 VTotal[k] - VActive[k],
7201 DynamicMetadataTransmittedBytes[k],
7202 DynamicMetadataLinesBeforeActiveRequired[k],
7203 Interlace[k],
7204 ProgressiveToInterlaceUnitInOPP,
7205 &TSetupPipe,
7206 &TdmbfPipe,
7207 &TdmecPipe,
7208 &TdmsksPipe,
7209 &dummy1,
7210 &dummy2,
7211 &dummy3);
7212 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7213 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7214 if (AllowedTimeForUrgentExtraLatency > 0) {
7215 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7216 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7217 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7218 } else {
7219 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7220 }
7221 }
7222 }
7223 DCFCLKRequiredForPeakBandwidth = 0;
7224 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7225 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7226 }
7227 MinimumTvmPlus2Tr0 = UrgLatency[i]
7228 * (GPUVMEnable == true ?
7229 (HostVMEnable == true ?
7230 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) :
7231 0);
7232 for (k = 0; k < NumberOfActivePlanes; ++k) {
7233 double MaximumTvmPlus2Tr0PlusTsw;
7234 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7235 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7236 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
7237 } else {
7238 DCFCLKRequiredForPeakBandwidth = dml_max3(
7239 DCFCLKRequiredForPeakBandwidth,
7240 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7241 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7242 }
7243 }
7244 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7245 }
7246 }
7247 }
7248
7249 static void CalculateUnboundedRequestAndCompressedBufferSize(
7250 unsigned int DETBufferSizeInKByte,
7251 int ConfigReturnBufferSizeInKByte,
7252 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7253 int TotalActiveDPP,
7254 bool NoChromaPlanes,
7255 int MaxNumDPP,
7256 int CompressedBufferSegmentSizeInkByteFinal,
7257 enum output_encoder_class *Output,
7258 bool *UnboundedRequestEnabled,
7259 int *CompressedBufferSizeInkByte)
7260 {
7261 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7262
7263 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7264 *CompressedBufferSizeInkByte = (
7265 *UnboundedRequestEnabled == true ?
7266 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7267 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7268 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7269
7270 #ifdef __DML_VBA_DEBUG__
7271 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7272 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7273 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7274 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7275 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7276 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7277 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7278 #endif
7279 }
7280
7281 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7282 {
7283 bool ret_val = false;
7284
7285 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7286 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7287 ret_val = false;
7288 }
7289 return (ret_val);
7290 }
7291
7292