1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #define UNIT_TEST 0
28 #if !UNIT_TEST
29 #include "dc.h"
30 #include "dc_link.h"
31 #endif
32 #include "../display_mode_lib.h"
33 #include "display_mode_vba_314.h"
34 #include "../dml_inline_defs.h"
35
36 /*
37 * NOTE:
38 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
39 *
40 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
41 * ways. Unless there is something clearly wrong with it the code should
42 * remain as-is as it provides us with a guarantee from HW that it is correct.
43 */
44
45 #define BPP_INVALID 0
46 #define BPP_BLENDED_PIPE 0xffffffff
47 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184
48 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
49
50 // For DML-C changes that hasn't been propagated to VBA yet
51 //#define __DML_VBA_ALLOW_DELTA__
52
53 // Move these to ip parameters/constant
54
55 // At which vstartup the DML start to try if the mode can be supported
56 #define __DML_VBA_MIN_VSTARTUP__ 9
57
58 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
59 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
60
61 // fudge factor for min dcfclk calclation
62 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
63
64 typedef struct {
65 double DPPCLK;
66 double DISPCLK;
67 double PixelClock;
68 double DCFCLKDeepSleep;
69 unsigned int DPPPerPlane;
70 bool ScalerEnabled;
71 double VRatio;
72 double VRatioChroma;
73 enum scan_direction_class SourceScan;
74 unsigned int BlockWidth256BytesY;
75 unsigned int BlockHeight256BytesY;
76 unsigned int BlockWidth256BytesC;
77 unsigned int BlockHeight256BytesC;
78 unsigned int InterlaceEnable;
79 unsigned int NumberOfCursors;
80 unsigned int VBlank;
81 unsigned int HTotal;
82 unsigned int DCCEnable;
83 bool ODMCombineIsEnabled;
84 enum source_format_class SourcePixelFormat;
85 int BytePerPixelY;
86 int BytePerPixelC;
87 bool ProgressiveToInterlaceUnitInOPP;
88 } Pipe;
89
90 #define BPP_INVALID 0
91 #define BPP_BLENDED_PIPE 0xffffffff
92
93 static bool CalculateBytePerPixelAnd256BBlockSizes(
94 enum source_format_class SourcePixelFormat,
95 enum dm_swizzle_mode SurfaceTiling,
96 unsigned int *BytePerPixelY,
97 unsigned int *BytePerPixelC,
98 double *BytePerPixelDETY,
99 double *BytePerPixelDETC,
100 unsigned int *BlockHeight256BytesY,
101 unsigned int *BlockHeight256BytesC,
102 unsigned int *BlockWidth256BytesY,
103 unsigned int *BlockWidth256BytesC);
104 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
105 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
106 static unsigned int dscceComputeDelay(
107 unsigned int bpc,
108 double BPP,
109 unsigned int sliceWidth,
110 unsigned int numSlices,
111 enum output_format_class pixelFormat,
112 enum output_encoder_class Output);
113 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
114 static bool CalculatePrefetchSchedule(
115 struct display_mode_lib *mode_lib,
116 double HostVMInefficiencyFactor,
117 Pipe *myPipe,
118 unsigned int DSCDelay,
119 double DPPCLKDelaySubtotalPlusCNVCFormater,
120 double DPPCLKDelaySCL,
121 double DPPCLKDelaySCLLBOnly,
122 double DPPCLKDelayCNVCCursor,
123 double DISPCLKDelaySubtotal,
124 unsigned int DPP_RECOUT_WIDTH,
125 enum output_format_class OutputFormat,
126 unsigned int MaxInterDCNTileRepeaters,
127 unsigned int VStartup,
128 unsigned int MaxVStartup,
129 unsigned int GPUVMPageTableLevels,
130 bool GPUVMEnable,
131 bool HostVMEnable,
132 unsigned int HostVMMaxNonCachedPageTableLevels,
133 double HostVMMinPageSize,
134 bool DynamicMetadataEnable,
135 bool DynamicMetadataVMEnabled,
136 int DynamicMetadataLinesBeforeActiveRequired,
137 unsigned int DynamicMetadataTransmittedBytes,
138 double UrgentLatency,
139 double UrgentExtraLatency,
140 double TCalc,
141 unsigned int PDEAndMetaPTEBytesFrame,
142 unsigned int MetaRowByte,
143 unsigned int PixelPTEBytesPerRow,
144 double PrefetchSourceLinesY,
145 unsigned int SwathWidthY,
146 double VInitPreFillY,
147 unsigned int MaxNumSwathY,
148 double PrefetchSourceLinesC,
149 unsigned int SwathWidthC,
150 double VInitPreFillC,
151 unsigned int MaxNumSwathC,
152 int swath_width_luma_ub,
153 int swath_width_chroma_ub,
154 unsigned int SwathHeightY,
155 unsigned int SwathHeightC,
156 double TWait,
157 double *DSTXAfterScaler,
158 double *DSTYAfterScaler,
159 double *DestinationLinesForPrefetch,
160 double *PrefetchBandwidth,
161 double *DestinationLinesToRequestVMInVBlank,
162 double *DestinationLinesToRequestRowInVBlank,
163 double *VRatioPrefetchY,
164 double *VRatioPrefetchC,
165 double *RequiredPrefetchPixDataBWLuma,
166 double *RequiredPrefetchPixDataBWChroma,
167 bool *NotEnoughTimeForDynamicMetadata,
168 double *Tno_bw,
169 double *prefetch_vmrow_bw,
170 double *Tdmdl_vm,
171 double *Tdmdl,
172 double *TSetup,
173 int *VUpdateOffsetPix,
174 double *VUpdateWidthPix,
175 double *VReadyOffsetPix);
176 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
177 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
178 static void CalculateDCCConfiguration(
179 bool DCCEnabled,
180 bool DCCProgrammingAssumesScanDirectionUnknown,
181 enum source_format_class SourcePixelFormat,
182 unsigned int SurfaceWidthLuma,
183 unsigned int SurfaceWidthChroma,
184 unsigned int SurfaceHeightLuma,
185 unsigned int SurfaceHeightChroma,
186 double DETBufferSize,
187 unsigned int RequestHeight256ByteLuma,
188 unsigned int RequestHeight256ByteChroma,
189 enum dm_swizzle_mode TilingFormat,
190 unsigned int BytePerPixelY,
191 unsigned int BytePerPixelC,
192 double BytePerPixelDETY,
193 double BytePerPixelDETC,
194 enum scan_direction_class ScanOrientation,
195 unsigned int *MaxUncompressedBlockLuma,
196 unsigned int *MaxUncompressedBlockChroma,
197 unsigned int *MaxCompressedBlockLuma,
198 unsigned int *MaxCompressedBlockChroma,
199 unsigned int *IndependentBlockLuma,
200 unsigned int *IndependentBlockChroma);
201 static double CalculatePrefetchSourceLines(
202 struct display_mode_lib *mode_lib,
203 double VRatio,
204 double vtaps,
205 bool Interlace,
206 bool ProgressiveToInterlaceUnitInOPP,
207 unsigned int SwathHeight,
208 unsigned int ViewportYStart,
209 double *VInitPreFill,
210 unsigned int *MaxNumSwath);
211 static unsigned int CalculateVMAndRowBytes(
212 struct display_mode_lib *mode_lib,
213 bool DCCEnable,
214 unsigned int BlockHeight256Bytes,
215 unsigned int BlockWidth256Bytes,
216 enum source_format_class SourcePixelFormat,
217 unsigned int SurfaceTiling,
218 unsigned int BytePerPixel,
219 enum scan_direction_class ScanDirection,
220 unsigned int SwathWidth,
221 unsigned int ViewportHeight,
222 bool GPUVMEnable,
223 bool HostVMEnable,
224 unsigned int HostVMMaxNonCachedPageTableLevels,
225 unsigned int GPUVMMinPageSize,
226 unsigned int HostVMMinPageSize,
227 unsigned int PTEBufferSizeInRequests,
228 unsigned int Pitch,
229 unsigned int DCCMetaPitch,
230 unsigned int *MacroTileWidth,
231 unsigned int *MetaRowByte,
232 unsigned int *PixelPTEBytesPerRow,
233 bool *PTEBufferSizeNotExceeded,
234 int *dpte_row_width_ub,
235 unsigned int *dpte_row_height,
236 unsigned int *MetaRequestWidth,
237 unsigned int *MetaRequestHeight,
238 unsigned int *meta_row_width,
239 unsigned int *meta_row_height,
240 int *vm_group_bytes,
241 unsigned int *dpte_group_bytes,
242 unsigned int *PixelPTEReqWidth,
243 unsigned int *PixelPTEReqHeight,
244 unsigned int *PTERequestSize,
245 int *DPDE0BytesFrame,
246 int *MetaPTEBytesFrame);
247 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
248 static void CalculateRowBandwidth(
249 bool GPUVMEnable,
250 enum source_format_class SourcePixelFormat,
251 double VRatio,
252 double VRatioChroma,
253 bool DCCEnable,
254 double LineTime,
255 unsigned int MetaRowByteLuma,
256 unsigned int MetaRowByteChroma,
257 unsigned int meta_row_height_luma,
258 unsigned int meta_row_height_chroma,
259 unsigned int PixelPTEBytesPerRowLuma,
260 unsigned int PixelPTEBytesPerRowChroma,
261 unsigned int dpte_row_height_luma,
262 unsigned int dpte_row_height_chroma,
263 double *meta_row_bw,
264 double *dpte_row_bw);
265
266 static void CalculateFlipSchedule(
267 struct display_mode_lib *mode_lib,
268 unsigned int k,
269 double HostVMInefficiencyFactor,
270 double UrgentExtraLatency,
271 double UrgentLatency,
272 double PDEAndMetaPTEBytesPerFrame,
273 double MetaRowBytes,
274 double DPTEBytesPerRow);
275 static double CalculateWriteBackDelay(
276 enum source_format_class WritebackPixelFormat,
277 double WritebackHRatio,
278 double WritebackVRatio,
279 unsigned int WritebackVTaps,
280 int WritebackDestinationWidth,
281 int WritebackDestinationHeight,
282 int WritebackSourceHeight,
283 unsigned int HTotal);
284
285 static void CalculateVupdateAndDynamicMetadataParameters(
286 int MaxInterDCNTileRepeaters,
287 double DPPCLK,
288 double DISPCLK,
289 double DCFClkDeepSleep,
290 double PixelClock,
291 int HTotal,
292 int VBlank,
293 int DynamicMetadataTransmittedBytes,
294 int DynamicMetadataLinesBeforeActiveRequired,
295 int InterlaceEnable,
296 bool ProgressiveToInterlaceUnitInOPP,
297 double *TSetup,
298 double *Tdmbf,
299 double *Tdmec,
300 double *Tdmsks,
301 int *VUpdateOffsetPix,
302 double *VUpdateWidthPix,
303 double *VReadyOffsetPix);
304
305 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
306 struct display_mode_lib *mode_lib,
307 unsigned int PrefetchMode,
308 double DCFCLK,
309 double ReturnBW,
310 double UrgentLatency,
311 double ExtraLatency,
312 double SOCCLK,
313 double DCFCLKDeepSleep,
314 unsigned int DETBufferSizeY[],
315 unsigned int DETBufferSizeC[],
316 unsigned int SwathHeightY[],
317 unsigned int SwathHeightC[],
318 double SwathWidthY[],
319 double SwathWidthC[],
320 unsigned int DPPPerPlane[],
321 double BytePerPixelDETY[],
322 double BytePerPixelDETC[],
323 bool UnboundedRequestEnabled,
324 unsigned int CompressedBufferSizeInkByte,
325 enum clock_change_support *DRAMClockChangeSupport,
326 double *StutterExitWatermark,
327 double *StutterEnterPlusExitWatermark,
328 double *Z8StutterExitWatermark,
329 double *Z8StutterEnterPlusExitWatermark);
330
331 static void CalculateDCFCLKDeepSleep(
332 struct display_mode_lib *mode_lib,
333 unsigned int NumberOfActivePlanes,
334 int BytePerPixelY[],
335 int BytePerPixelC[],
336 double VRatio[],
337 double VRatioChroma[],
338 double SwathWidthY[],
339 double SwathWidthC[],
340 unsigned int DPPPerPlane[],
341 double HRatio[],
342 double HRatioChroma[],
343 double PixelClock[],
344 double PSCL_THROUGHPUT[],
345 double PSCL_THROUGHPUT_CHROMA[],
346 double DPPCLK[],
347 double ReadBandwidthLuma[],
348 double ReadBandwidthChroma[],
349 int ReturnBusWidth,
350 double *DCFCLKDeepSleep);
351
352 static void CalculateUrgentBurstFactor(
353 int swath_width_luma_ub,
354 int swath_width_chroma_ub,
355 unsigned int SwathHeightY,
356 unsigned int SwathHeightC,
357 double LineTime,
358 double UrgentLatency,
359 double CursorBufferSize,
360 unsigned int CursorWidth,
361 unsigned int CursorBPP,
362 double VRatio,
363 double VRatioC,
364 double BytePerPixelInDETY,
365 double BytePerPixelInDETC,
366 double DETBufferSizeY,
367 double DETBufferSizeC,
368 double *UrgentBurstFactorCursor,
369 double *UrgentBurstFactorLuma,
370 double *UrgentBurstFactorChroma,
371 bool *NotEnoughUrgentLatencyHiding);
372
373 static void UseMinimumDCFCLK(
374 struct display_mode_lib *mode_lib,
375 int MaxPrefetchMode,
376 int ReorderingBytes);
377
378 static void CalculatePixelDeliveryTimes(
379 unsigned int NumberOfActivePlanes,
380 double VRatio[],
381 double VRatioChroma[],
382 double VRatioPrefetchY[],
383 double VRatioPrefetchC[],
384 unsigned int swath_width_luma_ub[],
385 unsigned int swath_width_chroma_ub[],
386 unsigned int DPPPerPlane[],
387 double HRatio[],
388 double HRatioChroma[],
389 double PixelClock[],
390 double PSCL_THROUGHPUT[],
391 double PSCL_THROUGHPUT_CHROMA[],
392 double DPPCLK[],
393 int BytePerPixelC[],
394 enum scan_direction_class SourceScan[],
395 unsigned int NumberOfCursors[],
396 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
397 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
398 unsigned int BlockWidth256BytesY[],
399 unsigned int BlockHeight256BytesY[],
400 unsigned int BlockWidth256BytesC[],
401 unsigned int BlockHeight256BytesC[],
402 double DisplayPipeLineDeliveryTimeLuma[],
403 double DisplayPipeLineDeliveryTimeChroma[],
404 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
405 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
406 double DisplayPipeRequestDeliveryTimeLuma[],
407 double DisplayPipeRequestDeliveryTimeChroma[],
408 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
409 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
410 double CursorRequestDeliveryTime[],
411 double CursorRequestDeliveryTimePrefetch[]);
412
413 static void CalculateMetaAndPTETimes(
414 int NumberOfActivePlanes,
415 bool GPUVMEnable,
416 int MetaChunkSize,
417 int MinMetaChunkSizeBytes,
418 int HTotal[],
419 double VRatio[],
420 double VRatioChroma[],
421 double DestinationLinesToRequestRowInVBlank[],
422 double DestinationLinesToRequestRowInImmediateFlip[],
423 bool DCCEnable[],
424 double PixelClock[],
425 int BytePerPixelY[],
426 int BytePerPixelC[],
427 enum scan_direction_class SourceScan[],
428 int dpte_row_height[],
429 int dpte_row_height_chroma[],
430 int meta_row_width[],
431 int meta_row_width_chroma[],
432 int meta_row_height[],
433 int meta_row_height_chroma[],
434 int meta_req_width[],
435 int meta_req_width_chroma[],
436 int meta_req_height[],
437 int meta_req_height_chroma[],
438 int dpte_group_bytes[],
439 int PTERequestSizeY[],
440 int PTERequestSizeC[],
441 int PixelPTEReqWidthY[],
442 int PixelPTEReqHeightY[],
443 int PixelPTEReqWidthC[],
444 int PixelPTEReqHeightC[],
445 int dpte_row_width_luma_ub[],
446 int dpte_row_width_chroma_ub[],
447 double DST_Y_PER_PTE_ROW_NOM_L[],
448 double DST_Y_PER_PTE_ROW_NOM_C[],
449 double DST_Y_PER_META_ROW_NOM_L[],
450 double DST_Y_PER_META_ROW_NOM_C[],
451 double TimePerMetaChunkNominal[],
452 double TimePerChromaMetaChunkNominal[],
453 double TimePerMetaChunkVBlank[],
454 double TimePerChromaMetaChunkVBlank[],
455 double TimePerMetaChunkFlip[],
456 double TimePerChromaMetaChunkFlip[],
457 double time_per_pte_group_nom_luma[],
458 double time_per_pte_group_vblank_luma[],
459 double time_per_pte_group_flip_luma[],
460 double time_per_pte_group_nom_chroma[],
461 double time_per_pte_group_vblank_chroma[],
462 double time_per_pte_group_flip_chroma[]);
463
464 static void CalculateVMGroupAndRequestTimes(
465 unsigned int NumberOfActivePlanes,
466 bool GPUVMEnable,
467 unsigned int GPUVMMaxPageTableLevels,
468 unsigned int HTotal[],
469 int BytePerPixelC[],
470 double DestinationLinesToRequestVMInVBlank[],
471 double DestinationLinesToRequestVMInImmediateFlip[],
472 bool DCCEnable[],
473 double PixelClock[],
474 int dpte_row_width_luma_ub[],
475 int dpte_row_width_chroma_ub[],
476 int vm_group_bytes[],
477 unsigned int dpde0_bytes_per_frame_ub_l[],
478 unsigned int dpde0_bytes_per_frame_ub_c[],
479 int meta_pte_bytes_per_frame_ub_l[],
480 int meta_pte_bytes_per_frame_ub_c[],
481 double TimePerVMGroupVBlank[],
482 double TimePerVMGroupFlip[],
483 double TimePerVMRequestVBlank[],
484 double TimePerVMRequestFlip[]);
485
486 static void CalculateStutterEfficiency(
487 struct display_mode_lib *mode_lib,
488 int CompressedBufferSizeInkByte,
489 bool UnboundedRequestEnabled,
490 int ConfigReturnBufferSizeInKByte,
491 int MetaFIFOSizeInKEntries,
492 int ZeroSizeBufferEntries,
493 int NumberOfActivePlanes,
494 int ROBBufferSizeInKByte,
495 double TotalDataReadBandwidth,
496 double DCFCLK,
497 double ReturnBW,
498 double COMPBUF_RESERVED_SPACE_64B,
499 double COMPBUF_RESERVED_SPACE_ZS,
500 double SRExitTime,
501 double SRExitZ8Time,
502 bool SynchronizedVBlank,
503 double Z8StutterEnterPlusExitWatermark,
504 double StutterEnterPlusExitWatermark,
505 bool ProgressiveToInterlaceUnitInOPP,
506 bool Interlace[],
507 double MinTTUVBlank[],
508 int DPPPerPlane[],
509 unsigned int DETBufferSizeY[],
510 int BytePerPixelY[],
511 double BytePerPixelDETY[],
512 double SwathWidthY[],
513 int SwathHeightY[],
514 int SwathHeightC[],
515 double NetDCCRateLuma[],
516 double NetDCCRateChroma[],
517 double DCCFractionOfZeroSizeRequestsLuma[],
518 double DCCFractionOfZeroSizeRequestsChroma[],
519 int HTotal[],
520 int VTotal[],
521 double PixelClock[],
522 double VRatio[],
523 enum scan_direction_class SourceScan[],
524 int BlockHeight256BytesY[],
525 int BlockWidth256BytesY[],
526 int BlockHeight256BytesC[],
527 int BlockWidth256BytesC[],
528 int DCCYMaxUncompressedBlock[],
529 int DCCCMaxUncompressedBlock[],
530 int VActive[],
531 bool DCCEnable[],
532 bool WritebackEnable[],
533 double ReadBandwidthPlaneLuma[],
534 double ReadBandwidthPlaneChroma[],
535 double meta_row_bw[],
536 double dpte_row_bw[],
537 double *StutterEfficiencyNotIncludingVBlank,
538 double *StutterEfficiency,
539 int *NumberOfStutterBurstsPerFrame,
540 double *Z8StutterEfficiencyNotIncludingVBlank,
541 double *Z8StutterEfficiency,
542 int *Z8NumberOfStutterBurstsPerFrame,
543 double *StutterPeriod);
544
545 static void CalculateSwathAndDETConfiguration(
546 bool ForceSingleDPP,
547 int NumberOfActivePlanes,
548 unsigned int DETBufferSizeInKByte,
549 double MaximumSwathWidthLuma[],
550 double MaximumSwathWidthChroma[],
551 enum scan_direction_class SourceScan[],
552 enum source_format_class SourcePixelFormat[],
553 enum dm_swizzle_mode SurfaceTiling[],
554 int ViewportWidth[],
555 int ViewportHeight[],
556 int SurfaceWidthY[],
557 int SurfaceWidthC[],
558 int SurfaceHeightY[],
559 int SurfaceHeightC[],
560 int Read256BytesBlockHeightY[],
561 int Read256BytesBlockHeightC[],
562 int Read256BytesBlockWidthY[],
563 int Read256BytesBlockWidthC[],
564 enum odm_combine_mode ODMCombineEnabled[],
565 int BlendingAndTiming[],
566 int BytePerPixY[],
567 int BytePerPixC[],
568 double BytePerPixDETY[],
569 double BytePerPixDETC[],
570 int HActive[],
571 double HRatio[],
572 double HRatioChroma[],
573 int DPPPerPlane[],
574 int swath_width_luma_ub[],
575 int swath_width_chroma_ub[],
576 double SwathWidth[],
577 double SwathWidthChroma[],
578 int SwathHeightY[],
579 int SwathHeightC[],
580 unsigned int DETBufferSizeY[],
581 unsigned int DETBufferSizeC[],
582 bool ViewportSizeSupportPerPlane[],
583 bool *ViewportSizeSupport);
584 static void CalculateSwathWidth(
585 bool ForceSingleDPP,
586 int NumberOfActivePlanes,
587 enum source_format_class SourcePixelFormat[],
588 enum scan_direction_class SourceScan[],
589 int ViewportWidth[],
590 int ViewportHeight[],
591 int SurfaceWidthY[],
592 int SurfaceWidthC[],
593 int SurfaceHeightY[],
594 int SurfaceHeightC[],
595 enum odm_combine_mode ODMCombineEnabled[],
596 int BytePerPixY[],
597 int BytePerPixC[],
598 int Read256BytesBlockHeightY[],
599 int Read256BytesBlockHeightC[],
600 int Read256BytesBlockWidthY[],
601 int Read256BytesBlockWidthC[],
602 int BlendingAndTiming[],
603 int HActive[],
604 double HRatio[],
605 int DPPPerPlane[],
606 double SwathWidthSingleDPPY[],
607 double SwathWidthSingleDPPC[],
608 double SwathWidthY[],
609 double SwathWidthC[],
610 int MaximumSwathHeightY[],
611 int MaximumSwathHeightC[],
612 int swath_width_luma_ub[],
613 int swath_width_chroma_ub[]);
614
615 static double CalculateExtraLatency(
616 int RoundTripPingLatencyCycles,
617 int ReorderingBytes,
618 double DCFCLK,
619 int TotalNumberOfActiveDPP,
620 int PixelChunkSizeInKByte,
621 int TotalNumberOfDCCActiveDPP,
622 int MetaChunkSize,
623 double ReturnBW,
624 bool GPUVMEnable,
625 bool HostVMEnable,
626 int NumberOfActivePlanes,
627 int NumberOfDPP[],
628 int dpte_group_bytes[],
629 double HostVMInefficiencyFactor,
630 double HostVMMinPageSize,
631 int HostVMMaxNonCachedPageTableLevels);
632
633 static double CalculateExtraLatencyBytes(
634 int ReorderingBytes,
635 int TotalNumberOfActiveDPP,
636 int PixelChunkSizeInKByte,
637 int TotalNumberOfDCCActiveDPP,
638 int MetaChunkSize,
639 bool GPUVMEnable,
640 bool HostVMEnable,
641 int NumberOfActivePlanes,
642 int NumberOfDPP[],
643 int dpte_group_bytes[],
644 double HostVMInefficiencyFactor,
645 double HostVMMinPageSize,
646 int HostVMMaxNonCachedPageTableLevels);
647
648 static double CalculateUrgentLatency(
649 double UrgentLatencyPixelDataOnly,
650 double UrgentLatencyPixelMixedWithVMData,
651 double UrgentLatencyVMDataOnly,
652 bool DoUrgentLatencyAdjustment,
653 double UrgentLatencyAdjustmentFabricClockComponent,
654 double UrgentLatencyAdjustmentFabricClockReference,
655 double FabricClockSingle);
656
657 static void CalculateUnboundedRequestAndCompressedBufferSize(
658 unsigned int DETBufferSizeInKByte,
659 int ConfigReturnBufferSizeInKByte,
660 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
661 int TotalActiveDPP,
662 bool NoChromaPlanes,
663 int MaxNumDPP,
664 int CompressedBufferSegmentSizeInkByteFinal,
665 enum output_encoder_class *Output,
666 bool *UnboundedRequestEnabled,
667 int *CompressedBufferSizeInkByte);
668
669 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
670 static unsigned int CalculateMaxVStartup(
671 unsigned int VTotal,
672 unsigned int VActive,
673 unsigned int VBlankNom,
674 unsigned int HTotal,
675 double PixelClock,
676 bool ProgressiveTointerlaceUnitinOPP,
677 bool Interlace,
678 unsigned int VBlankNomDefaultUS,
679 double WritebackDelayTime);
680
dml314_recalculate(struct display_mode_lib * mode_lib)681 void dml314_recalculate(struct display_mode_lib *mode_lib)
682 {
683 ModeSupportAndSystemConfiguration(mode_lib);
684 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
685 DisplayPipeConfiguration(mode_lib);
686 #ifdef __DML_VBA_DEBUG__
687 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
688 #endif
689 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
690 }
691
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)692 static unsigned int dscceComputeDelay(
693 unsigned int bpc,
694 double BPP,
695 unsigned int sliceWidth,
696 unsigned int numSlices,
697 enum output_format_class pixelFormat,
698 enum output_encoder_class Output)
699 {
700 // valid bpc = source bits per component in the set of {8, 10, 12}
701 // valid bpp = increments of 1/16 of a bit
702 // min = 6/7/8 in N420/N422/444, respectively
703 // max = such that compression is 1:1
704 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
705 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
706 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
707
708 // fixed value
709 unsigned int rcModelSize = 8192;
710
711 // N422/N420 operate at 2 pixels per clock
712 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
713
714 if (pixelFormat == dm_420)
715 pixelsPerClock = 2;
716 else if (pixelFormat == dm_444)
717 pixelsPerClock = 1;
718 else if (pixelFormat == dm_n422)
719 pixelsPerClock = 2;
720 // #all other modes operate at 1 pixel per clock
721 else
722 pixelsPerClock = 1;
723
724 //initial transmit delay as per PPS
725 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
726
727 //compute ssm delay
728 if (bpc == 8)
729 D = 81;
730 else if (bpc == 10)
731 D = 89;
732 else
733 D = 113;
734
735 //divide by pixel per cycle to compute slice width as seen by DSC
736 w = sliceWidth / pixelsPerClock;
737
738 //422 mode has an additional cycle of delay
739 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
740 s = 0;
741 else
742 s = 1;
743
744 //main calculation for the dscce
745 ix = initalXmitDelay + 45;
746 wx = (w + 2) / 3;
747 P = 3 * wx - w;
748 l0 = ix / w;
749 a = ix + P * l0;
750 ax = (a + 2) / 3 + D + 6 + 1;
751 L = (ax + wx - 1) / wx;
752 if ((ix % w) == 0 && P != 0)
753 lstall = 1;
754 else
755 lstall = 0;
756 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
757
758 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
759 pixels = Delay * 3 * pixelsPerClock;
760 return pixels;
761 }
762
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)763 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
764 {
765 unsigned int Delay = 0;
766
767 if (pixelFormat == dm_420) {
768 // sfr
769 Delay = Delay + 2;
770 // dsccif
771 Delay = Delay + 0;
772 // dscc - input deserializer
773 Delay = Delay + 3;
774 // dscc gets pixels every other cycle
775 Delay = Delay + 2;
776 // dscc - input cdc fifo
777 Delay = Delay + 12;
778 // dscc gets pixels every other cycle
779 Delay = Delay + 13;
780 // dscc - cdc uncertainty
781 Delay = Delay + 2;
782 // dscc - output cdc fifo
783 Delay = Delay + 7;
784 // dscc gets pixels every other cycle
785 Delay = Delay + 3;
786 // dscc - cdc uncertainty
787 Delay = Delay + 2;
788 // dscc - output serializer
789 Delay = Delay + 1;
790 // sft
791 Delay = Delay + 1;
792 } else if (pixelFormat == dm_n422) {
793 // sfr
794 Delay = Delay + 2;
795 // dsccif
796 Delay = Delay + 1;
797 // dscc - input deserializer
798 Delay = Delay + 5;
799 // dscc - input cdc fifo
800 Delay = Delay + 25;
801 // dscc - cdc uncertainty
802 Delay = Delay + 2;
803 // dscc - output cdc fifo
804 Delay = Delay + 10;
805 // dscc - cdc uncertainty
806 Delay = Delay + 2;
807 // dscc - output serializer
808 Delay = Delay + 1;
809 // sft
810 Delay = Delay + 1;
811 } else {
812 // sfr
813 Delay = Delay + 2;
814 // dsccif
815 Delay = Delay + 0;
816 // dscc - input deserializer
817 Delay = Delay + 3;
818 // dscc - input cdc fifo
819 Delay = Delay + 12;
820 // dscc - cdc uncertainty
821 Delay = Delay + 2;
822 // dscc - output cdc fifo
823 Delay = Delay + 7;
824 // dscc - output serializer
825 Delay = Delay + 1;
826 // dscc - cdc uncertainty
827 Delay = Delay + 2;
828 // sft
829 Delay = Delay + 1;
830 }
831
832 return Delay;
833 }
834
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)835 static bool CalculatePrefetchSchedule(
836 struct display_mode_lib *mode_lib,
837 double HostVMInefficiencyFactor,
838 Pipe *myPipe,
839 unsigned int DSCDelay,
840 double DPPCLKDelaySubtotalPlusCNVCFormater,
841 double DPPCLKDelaySCL,
842 double DPPCLKDelaySCLLBOnly,
843 double DPPCLKDelayCNVCCursor,
844 double DISPCLKDelaySubtotal,
845 unsigned int DPP_RECOUT_WIDTH,
846 enum output_format_class OutputFormat,
847 unsigned int MaxInterDCNTileRepeaters,
848 unsigned int VStartup,
849 unsigned int MaxVStartup,
850 unsigned int GPUVMPageTableLevels,
851 bool GPUVMEnable,
852 bool HostVMEnable,
853 unsigned int HostVMMaxNonCachedPageTableLevels,
854 double HostVMMinPageSize,
855 bool DynamicMetadataEnable,
856 bool DynamicMetadataVMEnabled,
857 int DynamicMetadataLinesBeforeActiveRequired,
858 unsigned int DynamicMetadataTransmittedBytes,
859 double UrgentLatency,
860 double UrgentExtraLatency,
861 double TCalc,
862 unsigned int PDEAndMetaPTEBytesFrame,
863 unsigned int MetaRowByte,
864 unsigned int PixelPTEBytesPerRow,
865 double PrefetchSourceLinesY,
866 unsigned int SwathWidthY,
867 double VInitPreFillY,
868 unsigned int MaxNumSwathY,
869 double PrefetchSourceLinesC,
870 unsigned int SwathWidthC,
871 double VInitPreFillC,
872 unsigned int MaxNumSwathC,
873 int swath_width_luma_ub,
874 int swath_width_chroma_ub,
875 unsigned int SwathHeightY,
876 unsigned int SwathHeightC,
877 double TWait,
878 double *DSTXAfterScaler,
879 double *DSTYAfterScaler,
880 double *DestinationLinesForPrefetch,
881 double *PrefetchBandwidth,
882 double *DestinationLinesToRequestVMInVBlank,
883 double *DestinationLinesToRequestRowInVBlank,
884 double *VRatioPrefetchY,
885 double *VRatioPrefetchC,
886 double *RequiredPrefetchPixDataBWLuma,
887 double *RequiredPrefetchPixDataBWChroma,
888 bool *NotEnoughTimeForDynamicMetadata,
889 double *Tno_bw,
890 double *prefetch_vmrow_bw,
891 double *Tdmdl_vm,
892 double *Tdmdl,
893 double *TSetup,
894 int *VUpdateOffsetPix,
895 double *VUpdateWidthPix,
896 double *VReadyOffsetPix)
897 {
898 bool MyError = false;
899 unsigned int DPPCycles, DISPCLKCycles;
900 double DSTTotalPixelsAfterScaler;
901 double LineTime;
902 double dst_y_prefetch_equ;
903 double Tsw_oto;
904 double prefetch_bw_oto;
905 double prefetch_bw_pr;
906 double Tvm_oto;
907 double Tr0_oto;
908 double Tvm_oto_lines;
909 double Tr0_oto_lines;
910 double dst_y_prefetch_oto;
911 double TimeForFetchingMetaPTE = 0;
912 double TimeForFetchingRowInVBlank = 0;
913 double LinesToRequestPrefetchPixelData = 0;
914 unsigned int HostVMDynamicLevelsTrips;
915 double trip_to_mem;
916 double Tvm_trips;
917 double Tr0_trips;
918 double Tvm_trips_rounded;
919 double Tr0_trips_rounded;
920 double Lsw_oto;
921 double Tpre_rounded;
922 double prefetch_bw_equ;
923 double Tvm_equ;
924 double Tr0_equ;
925 double Tdmbf;
926 double Tdmec;
927 double Tdmsks;
928 double prefetch_sw_bytes;
929 double bytes_pp;
930 double dep_bytes;
931 int max_vratio_pre = 4;
932 double min_Lsw;
933 double Tsw_est1 = 0;
934 double Tsw_est3 = 0;
935 double max_Tsw = 0;
936
937 if (GPUVMEnable == true && HostVMEnable == true) {
938 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
939 } else {
940 HostVMDynamicLevelsTrips = 0;
941 }
942 #ifdef __DML_VBA_DEBUG__
943 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
944 #endif
945 CalculateVupdateAndDynamicMetadataParameters(
946 MaxInterDCNTileRepeaters,
947 myPipe->DPPCLK,
948 myPipe->DISPCLK,
949 myPipe->DCFCLKDeepSleep,
950 myPipe->PixelClock,
951 myPipe->HTotal,
952 myPipe->VBlank,
953 DynamicMetadataTransmittedBytes,
954 DynamicMetadataLinesBeforeActiveRequired,
955 myPipe->InterlaceEnable,
956 myPipe->ProgressiveToInterlaceUnitInOPP,
957 TSetup,
958 &Tdmbf,
959 &Tdmec,
960 &Tdmsks,
961 VUpdateOffsetPix,
962 VUpdateWidthPix,
963 VReadyOffsetPix);
964
965 LineTime = myPipe->HTotal / myPipe->PixelClock;
966 trip_to_mem = UrgentLatency;
967 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
968
969 #ifdef __DML_VBA_ALLOW_DELTA__
970 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
971 #else
972 if (DynamicMetadataVMEnabled == true) {
973 #endif
974 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
975 } else {
976 *Tdmdl = TWait + UrgentExtraLatency;
977 }
978
979 #ifdef __DML_VBA_ALLOW_DELTA__
980 if (DynamicMetadataEnable == false) {
981 *Tdmdl = 0.0;
982 }
983 #endif
984
985 if (DynamicMetadataEnable == true) {
986 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
987 *NotEnoughTimeForDynamicMetadata = true;
988 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
989 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
990 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
991 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
992 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
993 } else {
994 *NotEnoughTimeForDynamicMetadata = false;
995 }
996 } else {
997 *NotEnoughTimeForDynamicMetadata = false;
998 }
999
1000 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1001
1002 if (myPipe->ScalerEnabled)
1003 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1004 else
1005 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1006
1007 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1008
1009 DISPCLKCycles = DISPCLKDelaySubtotal;
1010
1011 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1012 return true;
1013
1014 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1015
1016 #ifdef __DML_VBA_DEBUG__
1017 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1018 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1019 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1020 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1021 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1022 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1023 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1024 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1025 #endif
1026
1027 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1028
1029 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1030 *DSTYAfterScaler = 1;
1031 else
1032 *DSTYAfterScaler = 0;
1033
1034 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1035 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1036 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1037
1038 #ifdef __DML_VBA_DEBUG__
1039 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1040 #endif
1041
1042 MyError = false;
1043
1044 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1045 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1046 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1047
1048 #ifdef __DML_VBA_ALLOW_DELTA__
1049 if (!myPipe->DCCEnable) {
1050 Tr0_trips = 0.0;
1051 Tr0_trips_rounded = 0.0;
1052 }
1053 #endif
1054
1055 if (!GPUVMEnable) {
1056 Tvm_trips = 0.0;
1057 Tvm_trips_rounded = 0.0;
1058 }
1059
1060 if (GPUVMEnable) {
1061 if (GPUVMPageTableLevels >= 3) {
1062 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1063 } else {
1064 *Tno_bw = 0;
1065 }
1066 } else if (!myPipe->DCCEnable) {
1067 *Tno_bw = LineTime;
1068 } else {
1069 *Tno_bw = LineTime / 4;
1070 }
1071
1072 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1073 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1074 else
1075 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1076 /*rev 99*/
1077 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
1078 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1079 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1080 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1081 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1082
1083 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1084 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1085 Tsw_oto = Lsw_oto * LineTime;
1086
1087 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1088
1089 #ifdef __DML_VBA_DEBUG__
1090 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1091 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1092 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1093 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1094 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1095 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1096 #endif
1097
1098 if (GPUVMEnable == true)
1099 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1100 else
1101 Tvm_oto = LineTime / 4.0;
1102
1103 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1104 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1105 LineTime - Tvm_oto,
1106 LineTime / 4);
1107 } else {
1108 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1109 }
1110
1111 #ifdef __DML_VBA_DEBUG__
1112 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1113 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1114 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1115 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1116 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1117 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1118 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1119 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1120 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1121 #endif
1122
1123 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1124 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1125 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1126 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1127 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1128 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1129
1130 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1131
1132 if (prefetch_sw_bytes < dep_bytes)
1133 prefetch_sw_bytes = 2 * dep_bytes;
1134
1135 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1136 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1137 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1138 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1139 dml_print("DML: LineTime: %f\n", LineTime);
1140 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1141
1142 dml_print("DML: LineTime: %f\n", LineTime);
1143 dml_print("DML: VStartup: %d\n", VStartup);
1144 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1145 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1146 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1147 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1148 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1149 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1150 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1151 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1152 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1153 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1154 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1155
1156 *PrefetchBandwidth = 0;
1157 *DestinationLinesToRequestVMInVBlank = 0;
1158 *DestinationLinesToRequestRowInVBlank = 0;
1159 *VRatioPrefetchY = 0;
1160 *VRatioPrefetchC = 0;
1161 *RequiredPrefetchPixDataBWLuma = 0;
1162 if (dst_y_prefetch_equ > 1) {
1163 double PrefetchBandwidth1;
1164 double PrefetchBandwidth2;
1165 double PrefetchBandwidth3;
1166 double PrefetchBandwidth4;
1167
1168 if (Tpre_rounded - *Tno_bw > 0) {
1169 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1170 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1171 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1172 } else {
1173 PrefetchBandwidth1 = 0;
1174 }
1175
1176 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1177 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1178 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1179 }
1180
1181 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1182 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1183 else
1184 PrefetchBandwidth2 = 0;
1185
1186 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1187 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1188 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1189 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1190 } else {
1191 PrefetchBandwidth3 = 0;
1192 }
1193
1194 #ifdef __DML_VBA_DEBUG__
1195 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1196 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1197 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1198 #endif
1199 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1200 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1201 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1202 }
1203
1204 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1205 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1206 else
1207 PrefetchBandwidth4 = 0;
1208
1209 {
1210 bool Case1OK;
1211 bool Case2OK;
1212 bool Case3OK;
1213
1214 if (PrefetchBandwidth1 > 0) {
1215 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1216 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1217 Case1OK = true;
1218 } else {
1219 Case1OK = false;
1220 }
1221 } else {
1222 Case1OK = false;
1223 }
1224
1225 if (PrefetchBandwidth2 > 0) {
1226 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1227 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1228 Case2OK = true;
1229 } else {
1230 Case2OK = false;
1231 }
1232 } else {
1233 Case2OK = false;
1234 }
1235
1236 if (PrefetchBandwidth3 > 0) {
1237 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1238 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1239 Case3OK = true;
1240 } else {
1241 Case3OK = false;
1242 }
1243 } else {
1244 Case3OK = false;
1245 }
1246
1247 if (Case1OK) {
1248 prefetch_bw_equ = PrefetchBandwidth1;
1249 } else if (Case2OK) {
1250 prefetch_bw_equ = PrefetchBandwidth2;
1251 } else if (Case3OK) {
1252 prefetch_bw_equ = PrefetchBandwidth3;
1253 } else {
1254 prefetch_bw_equ = PrefetchBandwidth4;
1255 }
1256
1257 #ifdef __DML_VBA_DEBUG__
1258 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1259 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1260 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1261 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1262 #endif
1263
1264 if (prefetch_bw_equ > 0) {
1265 if (GPUVMEnable == true) {
1266 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1267 } else {
1268 Tvm_equ = LineTime / 4;
1269 }
1270
1271 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1272 Tr0_equ = dml_max4(
1273 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1274 Tr0_trips,
1275 (LineTime - Tvm_equ) / 2,
1276 LineTime / 4);
1277 } else {
1278 Tr0_equ = (LineTime - Tvm_equ) / 2;
1279 }
1280 } else {
1281 Tvm_equ = 0;
1282 Tr0_equ = 0;
1283 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1284 }
1285 }
1286
1287 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1288 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1289 TimeForFetchingMetaPTE = Tvm_oto;
1290 TimeForFetchingRowInVBlank = Tr0_oto;
1291 *PrefetchBandwidth = prefetch_bw_oto;
1292 } else {
1293 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1294 TimeForFetchingMetaPTE = Tvm_equ;
1295 TimeForFetchingRowInVBlank = Tr0_equ;
1296 *PrefetchBandwidth = prefetch_bw_equ;
1297 }
1298
1299 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1300
1301 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1302
1303 #ifdef __DML_VBA_ALLOW_DELTA__
1304 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1305 // See note above dated 5/30/2018
1306 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1307 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1308 #else
1309 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1310 #endif
1311
1312 #ifdef __DML_VBA_DEBUG__
1313 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1314 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1315 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1316 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1317 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1318 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1319 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1320 #endif
1321
1322 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1323
1324 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1325 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1326 #ifdef __DML_VBA_DEBUG__
1327 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1328 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1329 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1330 #endif
1331 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1332 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1333 *VRatioPrefetchY = dml_max(
1334 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1335 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1336 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1337 } else {
1338 MyError = true;
1339 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1340 *VRatioPrefetchY = 0;
1341 }
1342 #ifdef __DML_VBA_DEBUG__
1343 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1344 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1345 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1346 #endif
1347 }
1348
1349 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1350 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1351
1352 #ifdef __DML_VBA_DEBUG__
1353 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1354 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1355 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1356 #endif
1357 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1358 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1359 *VRatioPrefetchC = dml_max(
1360 *VRatioPrefetchC,
1361 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1362 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1363 } else {
1364 MyError = true;
1365 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1366 *VRatioPrefetchC = 0;
1367 }
1368 #ifdef __DML_VBA_DEBUG__
1369 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1370 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1371 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1372 #endif
1373 }
1374
1375 #ifdef __DML_VBA_DEBUG__
1376 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1377 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1378 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1379 #endif
1380
1381 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1382
1383 #ifdef __DML_VBA_DEBUG__
1384 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1385 #endif
1386
1387 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1388 / LineTime;
1389 } else {
1390 MyError = true;
1391 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1392 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1393 *VRatioPrefetchY = 0;
1394 *VRatioPrefetchC = 0;
1395 *RequiredPrefetchPixDataBWLuma = 0;
1396 *RequiredPrefetchPixDataBWChroma = 0;
1397 }
1398
1399 dml_print(
1400 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1401 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1402 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1403 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1404 dml_print(
1405 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1406 (double) LinesToRequestPrefetchPixelData * LineTime);
1407 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1408 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1409 dml_print(
1410 "DML: Tslack(pre): %fus - time left over in schedule\n",
1411 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1412 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1413 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1414
1415 } else {
1416 MyError = true;
1417 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1418 }
1419
1420 {
1421 double prefetch_vm_bw;
1422 double prefetch_row_bw;
1423
1424 if (PDEAndMetaPTEBytesFrame == 0) {
1425 prefetch_vm_bw = 0;
1426 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1427 #ifdef __DML_VBA_DEBUG__
1428 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1429 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1430 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1431 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1432 #endif
1433 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1434 #ifdef __DML_VBA_DEBUG__
1435 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1436 #endif
1437 } else {
1438 prefetch_vm_bw = 0;
1439 MyError = true;
1440 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1441 }
1442
1443 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1444 prefetch_row_bw = 0;
1445 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1446 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1447
1448 #ifdef __DML_VBA_DEBUG__
1449 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1450 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1451 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1452 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1453 #endif
1454 } else {
1455 prefetch_row_bw = 0;
1456 MyError = true;
1457 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1458 }
1459
1460 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1461 }
1462
1463 if (MyError) {
1464 *PrefetchBandwidth = 0;
1465 TimeForFetchingMetaPTE = 0;
1466 TimeForFetchingRowInVBlank = 0;
1467 *DestinationLinesToRequestVMInVBlank = 0;
1468 *DestinationLinesToRequestRowInVBlank = 0;
1469 *DestinationLinesForPrefetch = 0;
1470 LinesToRequestPrefetchPixelData = 0;
1471 *VRatioPrefetchY = 0;
1472 *VRatioPrefetchC = 0;
1473 *RequiredPrefetchPixDataBWLuma = 0;
1474 *RequiredPrefetchPixDataBWChroma = 0;
1475 }
1476
1477 return MyError;
1478 }
1479
1480 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1481 {
1482 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1483 }
1484
1485 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1486 {
1487 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1488 }
1489
1490 static void CalculateDCCConfiguration(
1491 bool DCCEnabled,
1492 bool DCCProgrammingAssumesScanDirectionUnknown,
1493 enum source_format_class SourcePixelFormat,
1494 unsigned int SurfaceWidthLuma,
1495 unsigned int SurfaceWidthChroma,
1496 unsigned int SurfaceHeightLuma,
1497 unsigned int SurfaceHeightChroma,
1498 double DETBufferSize,
1499 unsigned int RequestHeight256ByteLuma,
1500 unsigned int RequestHeight256ByteChroma,
1501 enum dm_swizzle_mode TilingFormat,
1502 unsigned int BytePerPixelY,
1503 unsigned int BytePerPixelC,
1504 double BytePerPixelDETY,
1505 double BytePerPixelDETC,
1506 enum scan_direction_class ScanOrientation,
1507 unsigned int *MaxUncompressedBlockLuma,
1508 unsigned int *MaxUncompressedBlockChroma,
1509 unsigned int *MaxCompressedBlockLuma,
1510 unsigned int *MaxCompressedBlockChroma,
1511 unsigned int *IndependentBlockLuma,
1512 unsigned int *IndependentBlockChroma)
1513 {
1514 int yuv420;
1515 int horz_div_l;
1516 int horz_div_c;
1517 int vert_div_l;
1518 int vert_div_c;
1519
1520 int swath_buf_size;
1521 double detile_buf_vp_horz_limit;
1522 double detile_buf_vp_vert_limit;
1523
1524 int MAS_vp_horz_limit;
1525 int MAS_vp_vert_limit;
1526 int max_vp_horz_width;
1527 int max_vp_vert_height;
1528 int eff_surf_width_l;
1529 int eff_surf_width_c;
1530 int eff_surf_height_l;
1531 int eff_surf_height_c;
1532
1533 int full_swath_bytes_horz_wc_l;
1534 int full_swath_bytes_horz_wc_c;
1535 int full_swath_bytes_vert_wc_l;
1536 int full_swath_bytes_vert_wc_c;
1537 int req128_horz_wc_l;
1538 int req128_horz_wc_c;
1539 int req128_vert_wc_l;
1540 int req128_vert_wc_c;
1541 int segment_order_horz_contiguous_luma;
1542 int segment_order_horz_contiguous_chroma;
1543 int segment_order_vert_contiguous_luma;
1544 int segment_order_vert_contiguous_chroma;
1545
1546 typedef enum {
1547 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1548 } RequestType;
1549 RequestType RequestLuma;
1550 RequestType RequestChroma;
1551
1552 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1553 horz_div_l = 1;
1554 horz_div_c = 1;
1555 vert_div_l = 1;
1556 vert_div_c = 1;
1557
1558 if (BytePerPixelY == 1)
1559 vert_div_l = 0;
1560 if (BytePerPixelC == 1)
1561 vert_div_c = 0;
1562 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1563 horz_div_l = 0;
1564 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1565 horz_div_c = 0;
1566
1567 if (BytePerPixelC == 0) {
1568 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1569 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1570 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1571 } else {
1572 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1573 detile_buf_vp_horz_limit = (double) swath_buf_size
1574 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1575 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1576 detile_buf_vp_vert_limit = (double) swath_buf_size
1577 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1578 }
1579
1580 if (SourcePixelFormat == dm_420_10) {
1581 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1582 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1583 }
1584
1585 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1586 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1587
1588 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1589 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1590 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1591 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1592 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1593 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1594 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1595 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1596
1597 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1598 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1599 if (BytePerPixelC > 0) {
1600 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1601 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1602 } else {
1603 full_swath_bytes_horz_wc_c = 0;
1604 full_swath_bytes_vert_wc_c = 0;
1605 }
1606
1607 if (SourcePixelFormat == dm_420_10) {
1608 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1609 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1610 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1611 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1612 }
1613
1614 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1615 req128_horz_wc_l = 0;
1616 req128_horz_wc_c = 0;
1617 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1618 req128_horz_wc_l = 0;
1619 req128_horz_wc_c = 1;
1620 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1621 req128_horz_wc_l = 1;
1622 req128_horz_wc_c = 0;
1623 } else {
1624 req128_horz_wc_l = 1;
1625 req128_horz_wc_c = 1;
1626 }
1627
1628 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1629 req128_vert_wc_l = 0;
1630 req128_vert_wc_c = 0;
1631 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1632 req128_vert_wc_l = 0;
1633 req128_vert_wc_c = 1;
1634 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1635 req128_vert_wc_l = 1;
1636 req128_vert_wc_c = 0;
1637 } else {
1638 req128_vert_wc_l = 1;
1639 req128_vert_wc_c = 1;
1640 }
1641
1642 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1643 segment_order_horz_contiguous_luma = 0;
1644 } else {
1645 segment_order_horz_contiguous_luma = 1;
1646 }
1647 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1648 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1649 segment_order_vert_contiguous_luma = 0;
1650 } else {
1651 segment_order_vert_contiguous_luma = 1;
1652 }
1653 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1654 segment_order_horz_contiguous_chroma = 0;
1655 } else {
1656 segment_order_horz_contiguous_chroma = 1;
1657 }
1658 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1659 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1660 segment_order_vert_contiguous_chroma = 0;
1661 } else {
1662 segment_order_vert_contiguous_chroma = 1;
1663 }
1664
1665 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1666 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1667 RequestLuma = REQ_256Bytes;
1668 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1669 RequestLuma = REQ_128BytesNonContiguous;
1670 } else {
1671 RequestLuma = REQ_128BytesContiguous;
1672 }
1673 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1674 RequestChroma = REQ_256Bytes;
1675 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1676 RequestChroma = REQ_128BytesNonContiguous;
1677 } else {
1678 RequestChroma = REQ_128BytesContiguous;
1679 }
1680 } else if (ScanOrientation != dm_vert) {
1681 if (req128_horz_wc_l == 0) {
1682 RequestLuma = REQ_256Bytes;
1683 } else if (segment_order_horz_contiguous_luma == 0) {
1684 RequestLuma = REQ_128BytesNonContiguous;
1685 } else {
1686 RequestLuma = REQ_128BytesContiguous;
1687 }
1688 if (req128_horz_wc_c == 0) {
1689 RequestChroma = REQ_256Bytes;
1690 } else if (segment_order_horz_contiguous_chroma == 0) {
1691 RequestChroma = REQ_128BytesNonContiguous;
1692 } else {
1693 RequestChroma = REQ_128BytesContiguous;
1694 }
1695 } else {
1696 if (req128_vert_wc_l == 0) {
1697 RequestLuma = REQ_256Bytes;
1698 } else if (segment_order_vert_contiguous_luma == 0) {
1699 RequestLuma = REQ_128BytesNonContiguous;
1700 } else {
1701 RequestLuma = REQ_128BytesContiguous;
1702 }
1703 if (req128_vert_wc_c == 0) {
1704 RequestChroma = REQ_256Bytes;
1705 } else if (segment_order_vert_contiguous_chroma == 0) {
1706 RequestChroma = REQ_128BytesNonContiguous;
1707 } else {
1708 RequestChroma = REQ_128BytesContiguous;
1709 }
1710 }
1711
1712 if (RequestLuma == REQ_256Bytes) {
1713 *MaxUncompressedBlockLuma = 256;
1714 *MaxCompressedBlockLuma = 256;
1715 *IndependentBlockLuma = 0;
1716 } else if (RequestLuma == REQ_128BytesContiguous) {
1717 *MaxUncompressedBlockLuma = 256;
1718 *MaxCompressedBlockLuma = 128;
1719 *IndependentBlockLuma = 128;
1720 } else {
1721 *MaxUncompressedBlockLuma = 256;
1722 *MaxCompressedBlockLuma = 64;
1723 *IndependentBlockLuma = 64;
1724 }
1725
1726 if (RequestChroma == REQ_256Bytes) {
1727 *MaxUncompressedBlockChroma = 256;
1728 *MaxCompressedBlockChroma = 256;
1729 *IndependentBlockChroma = 0;
1730 } else if (RequestChroma == REQ_128BytesContiguous) {
1731 *MaxUncompressedBlockChroma = 256;
1732 *MaxCompressedBlockChroma = 128;
1733 *IndependentBlockChroma = 128;
1734 } else {
1735 *MaxUncompressedBlockChroma = 256;
1736 *MaxCompressedBlockChroma = 64;
1737 *IndependentBlockChroma = 64;
1738 }
1739
1740 if (DCCEnabled != true || BytePerPixelC == 0) {
1741 *MaxUncompressedBlockChroma = 0;
1742 *MaxCompressedBlockChroma = 0;
1743 *IndependentBlockChroma = 0;
1744 }
1745
1746 if (DCCEnabled != true) {
1747 *MaxUncompressedBlockLuma = 0;
1748 *MaxCompressedBlockLuma = 0;
1749 *IndependentBlockLuma = 0;
1750 }
1751 }
1752
1753 static double CalculatePrefetchSourceLines(
1754 struct display_mode_lib *mode_lib,
1755 double VRatio,
1756 double vtaps,
1757 bool Interlace,
1758 bool ProgressiveToInterlaceUnitInOPP,
1759 unsigned int SwathHeight,
1760 unsigned int ViewportYStart,
1761 double *VInitPreFill,
1762 unsigned int *MaxNumSwath)
1763 {
1764 struct vba_vars_st *v = &mode_lib->vba;
1765 unsigned int MaxPartialSwath;
1766
1767 if (ProgressiveToInterlaceUnitInOPP)
1768 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1769 else
1770 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1771
1772 if (!v->IgnoreViewportPositioning) {
1773
1774 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1775
1776 if (*VInitPreFill > 1.0)
1777 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1778 else
1779 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1780 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1781
1782 } else {
1783
1784 if (ViewportYStart != 0)
1785 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1786
1787 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1788
1789 if (*VInitPreFill > 1.0)
1790 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1791 else
1792 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1793 }
1794
1795 #ifdef __DML_VBA_DEBUG__
1796 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1797 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1798 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1799 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1800 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1801 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1802 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1803 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1804 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1805 #endif
1806 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1807 }
1808
1809 static unsigned int CalculateVMAndRowBytes(
1810 struct display_mode_lib *mode_lib,
1811 bool DCCEnable,
1812 unsigned int BlockHeight256Bytes,
1813 unsigned int BlockWidth256Bytes,
1814 enum source_format_class SourcePixelFormat,
1815 unsigned int SurfaceTiling,
1816 unsigned int BytePerPixel,
1817 enum scan_direction_class ScanDirection,
1818 unsigned int SwathWidth,
1819 unsigned int ViewportHeight,
1820 bool GPUVMEnable,
1821 bool HostVMEnable,
1822 unsigned int HostVMMaxNonCachedPageTableLevels,
1823 unsigned int GPUVMMinPageSize,
1824 unsigned int HostVMMinPageSize,
1825 unsigned int PTEBufferSizeInRequests,
1826 unsigned int Pitch,
1827 unsigned int DCCMetaPitch,
1828 unsigned int *MacroTileWidth,
1829 unsigned int *MetaRowByte,
1830 unsigned int *PixelPTEBytesPerRow,
1831 bool *PTEBufferSizeNotExceeded,
1832 int *dpte_row_width_ub,
1833 unsigned int *dpte_row_height,
1834 unsigned int *MetaRequestWidth,
1835 unsigned int *MetaRequestHeight,
1836 unsigned int *meta_row_width,
1837 unsigned int *meta_row_height,
1838 int *vm_group_bytes,
1839 unsigned int *dpte_group_bytes,
1840 unsigned int *PixelPTEReqWidth,
1841 unsigned int *PixelPTEReqHeight,
1842 unsigned int *PTERequestSize,
1843 int *DPDE0BytesFrame,
1844 int *MetaPTEBytesFrame)
1845 {
1846 struct vba_vars_st *v = &mode_lib->vba;
1847 unsigned int MPDEBytesFrame;
1848 unsigned int DCCMetaSurfaceBytes;
1849 unsigned int MacroTileSizeBytes;
1850 unsigned int MacroTileHeight;
1851 unsigned int ExtraDPDEBytesFrame;
1852 unsigned int PDEAndMetaPTEBytesFrame;
1853 unsigned int PixelPTEReqHeightPTEs = 0;
1854 unsigned int HostVMDynamicLevels = 0;
1855 double FractionOfPTEReturnDrop;
1856
1857 if (GPUVMEnable == true && HostVMEnable == true) {
1858 if (HostVMMinPageSize < 2048) {
1859 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1860 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1861 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1862 } else {
1863 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1864 }
1865 }
1866
1867 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1868 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1869 if (ScanDirection != dm_vert) {
1870 *meta_row_height = *MetaRequestHeight;
1871 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1872 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1873 } else {
1874 *meta_row_height = *MetaRequestWidth;
1875 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1876 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1877 }
1878 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1879 if (GPUVMEnable == true) {
1880 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1881 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1882 } else {
1883 *MetaPTEBytesFrame = 0;
1884 MPDEBytesFrame = 0;
1885 }
1886
1887 if (DCCEnable != true) {
1888 *MetaPTEBytesFrame = 0;
1889 MPDEBytesFrame = 0;
1890 *MetaRowByte = 0;
1891 }
1892
1893 if (SurfaceTiling == dm_sw_linear) {
1894 MacroTileSizeBytes = 256;
1895 MacroTileHeight = BlockHeight256Bytes;
1896 } else {
1897 MacroTileSizeBytes = 65536;
1898 MacroTileHeight = 16 * BlockHeight256Bytes;
1899 }
1900 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1901
1902 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1903 if (ScanDirection != dm_vert) {
1904 *DPDE0BytesFrame = 64
1905 * (dml_ceil(
1906 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1907 / (8 * 2097152),
1908 1) + 1);
1909 } else {
1910 *DPDE0BytesFrame = 64
1911 * (dml_ceil(
1912 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1913 / (8 * 2097152),
1914 1) + 1);
1915 }
1916 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1917 } else {
1918 *DPDE0BytesFrame = 0;
1919 ExtraDPDEBytesFrame = 0;
1920 }
1921
1922 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1923
1924 #ifdef __DML_VBA_DEBUG__
1925 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1926 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1927 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1928 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1929 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1930 #endif
1931
1932 if (HostVMEnable == true) {
1933 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1934 }
1935 #ifdef __DML_VBA_DEBUG__
1936 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1937 #endif
1938
1939 if (SurfaceTiling == dm_sw_linear) {
1940 PixelPTEReqHeightPTEs = 1;
1941 *PixelPTEReqHeight = 1;
1942 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1943 *PTERequestSize = 64;
1944 FractionOfPTEReturnDrop = 0;
1945 } else if (MacroTileSizeBytes == 4096) {
1946 PixelPTEReqHeightPTEs = 1;
1947 *PixelPTEReqHeight = MacroTileHeight;
1948 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1949 *PTERequestSize = 64;
1950 if (ScanDirection != dm_vert)
1951 FractionOfPTEReturnDrop = 0;
1952 else
1953 FractionOfPTEReturnDrop = 7 / 8;
1954 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1955 PixelPTEReqHeightPTEs = 16;
1956 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1957 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1958 *PTERequestSize = 128;
1959 FractionOfPTEReturnDrop = 0;
1960 } else {
1961 PixelPTEReqHeightPTEs = 1;
1962 *PixelPTEReqHeight = MacroTileHeight;
1963 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1964 *PTERequestSize = 64;
1965 FractionOfPTEReturnDrop = 0;
1966 }
1967
1968 if (SurfaceTiling == dm_sw_linear) {
1969 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1970 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1971 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1972 } else if (ScanDirection != dm_vert) {
1973 *dpte_row_height = *PixelPTEReqHeight;
1974 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1975 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1976 } else {
1977 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1978 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1979 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1980 }
1981
1982 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1983 *PTEBufferSizeNotExceeded = true;
1984 } else {
1985 *PTEBufferSizeNotExceeded = false;
1986 }
1987
1988 if (GPUVMEnable != true) {
1989 *PixelPTEBytesPerRow = 0;
1990 *PTEBufferSizeNotExceeded = true;
1991 }
1992
1993 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1994
1995 if (HostVMEnable == true) {
1996 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1997 }
1998
1999 if (HostVMEnable == true) {
2000 *vm_group_bytes = 512;
2001 *dpte_group_bytes = 512;
2002 } else if (GPUVMEnable == true) {
2003 *vm_group_bytes = 2048;
2004 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2005 *dpte_group_bytes = 512;
2006 } else {
2007 *dpte_group_bytes = 2048;
2008 }
2009 } else {
2010 *vm_group_bytes = 0;
2011 *dpte_group_bytes = 0;
2012 }
2013 return PDEAndMetaPTEBytesFrame;
2014 }
2015
2016 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2017 {
2018 struct vba_vars_st *v = &mode_lib->vba;
2019 unsigned int j, k;
2020 double HostVMInefficiencyFactor = 1.0;
2021 bool NoChromaPlanes = true;
2022 int ReorderBytes;
2023 double VMDataOnlyReturnBW;
2024 double MaxTotalRDBandwidth = 0;
2025 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2026
2027 v->WritebackDISPCLK = 0.0;
2028 v->DISPCLKWithRamping = 0;
2029 v->DISPCLKWithoutRamping = 0;
2030 v->GlobalDPPCLK = 0.0;
2031 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2032 {
2033 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2034 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2035 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2036 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2037
2038 if (v->HostVMEnable != true) {
2039 v->ReturnBW = dml_min(
2040 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2041 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2042 } else {
2043 v->ReturnBW = dml_min(
2044 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2045 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2046 }
2047 }
2048 /* End DAL custom code */
2049
2050 // DISPCLK and DPPCLK Calculation
2051 //
2052 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2053 if (v->WritebackEnable[k]) {
2054 v->WritebackDISPCLK = dml_max(
2055 v->WritebackDISPCLK,
2056 dml314_CalculateWriteBackDISPCLK(
2057 v->WritebackPixelFormat[k],
2058 v->PixelClock[k],
2059 v->WritebackHRatio[k],
2060 v->WritebackVRatio[k],
2061 v->WritebackHTaps[k],
2062 v->WritebackVTaps[k],
2063 v->WritebackSourceWidth[k],
2064 v->WritebackDestinationWidth[k],
2065 v->HTotal[k],
2066 v->WritebackLineBufferSize));
2067 }
2068 }
2069
2070 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2071 if (v->HRatio[k] > 1) {
2072 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2073 v->MaxDCHUBToPSCLThroughput,
2074 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2075 } else {
2076 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2077 }
2078
2079 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2080 * dml_max(
2081 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2082 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2083
2084 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2085 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2086 }
2087
2088 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2089 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2090 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2091 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2092 } else {
2093 if (v->HRatioChroma[k] > 1) {
2094 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2095 v->MaxDCHUBToPSCLThroughput,
2096 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2097 } else {
2098 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2099 }
2100 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2101 * dml_max3(
2102 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2103 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2104 1.0);
2105
2106 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2107 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2108 }
2109
2110 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2111 }
2112 }
2113
2114 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2115 if (v->BlendingAndTiming[k] != k)
2116 continue;
2117 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2118 v->DISPCLKWithRamping = dml_max(
2119 v->DISPCLKWithRamping,
2120 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2121 * (1 + v->DISPCLKRampingMargin / 100));
2122 v->DISPCLKWithoutRamping = dml_max(
2123 v->DISPCLKWithoutRamping,
2124 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2125 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2126 v->DISPCLKWithRamping = dml_max(
2127 v->DISPCLKWithRamping,
2128 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2129 * (1 + v->DISPCLKRampingMargin / 100));
2130 v->DISPCLKWithoutRamping = dml_max(
2131 v->DISPCLKWithoutRamping,
2132 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2133 } else {
2134 v->DISPCLKWithRamping = dml_max(
2135 v->DISPCLKWithRamping,
2136 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2137 v->DISPCLKWithoutRamping = dml_max(
2138 v->DISPCLKWithoutRamping,
2139 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2140 }
2141 }
2142
2143 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2144 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2145
2146 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2147 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2148 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2149 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2150 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2151 v->DISPCLKDPPCLKVCOSpeed);
2152 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2153 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2154 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2155 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2156 } else {
2157 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2158 }
2159 v->DISPCLK = v->DISPCLK_calculated;
2160 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2161
2162 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2163 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2164 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2165 }
2166 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2167 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2168 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2169 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2170 }
2171
2172 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2173 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2174 }
2175
2176 // Urgent and B P-State/DRAM Clock Change Watermark
2177 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2178 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2179
2180 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2181 CalculateBytePerPixelAnd256BBlockSizes(
2182 v->SourcePixelFormat[k],
2183 v->SurfaceTiling[k],
2184 &v->BytePerPixelY[k],
2185 &v->BytePerPixelC[k],
2186 &v->BytePerPixelDETY[k],
2187 &v->BytePerPixelDETC[k],
2188 &v->BlockHeight256BytesY[k],
2189 &v->BlockHeight256BytesC[k],
2190 &v->BlockWidth256BytesY[k],
2191 &v->BlockWidth256BytesC[k]);
2192 }
2193
2194 CalculateSwathWidth(
2195 false,
2196 v->NumberOfActivePlanes,
2197 v->SourcePixelFormat,
2198 v->SourceScan,
2199 v->ViewportWidth,
2200 v->ViewportHeight,
2201 v->SurfaceWidthY,
2202 v->SurfaceWidthC,
2203 v->SurfaceHeightY,
2204 v->SurfaceHeightC,
2205 v->ODMCombineEnabled,
2206 v->BytePerPixelY,
2207 v->BytePerPixelC,
2208 v->BlockHeight256BytesY,
2209 v->BlockHeight256BytesC,
2210 v->BlockWidth256BytesY,
2211 v->BlockWidth256BytesC,
2212 v->BlendingAndTiming,
2213 v->HActive,
2214 v->HRatio,
2215 v->DPPPerPlane,
2216 v->SwathWidthSingleDPPY,
2217 v->SwathWidthSingleDPPC,
2218 v->SwathWidthY,
2219 v->SwathWidthC,
2220 v->dummyinteger3,
2221 v->dummyinteger4,
2222 v->swath_width_luma_ub,
2223 v->swath_width_chroma_ub);
2224
2225 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2226 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2227 * v->VRatio[k];
2228 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2229 * v->VRatioChroma[k];
2230 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2231 }
2232
2233 // DCFCLK Deep Sleep
2234 CalculateDCFCLKDeepSleep(
2235 mode_lib,
2236 v->NumberOfActivePlanes,
2237 v->BytePerPixelY,
2238 v->BytePerPixelC,
2239 v->VRatio,
2240 v->VRatioChroma,
2241 v->SwathWidthY,
2242 v->SwathWidthC,
2243 v->DPPPerPlane,
2244 v->HRatio,
2245 v->HRatioChroma,
2246 v->PixelClock,
2247 v->PSCL_THROUGHPUT_LUMA,
2248 v->PSCL_THROUGHPUT_CHROMA,
2249 v->DPPCLK,
2250 v->ReadBandwidthPlaneLuma,
2251 v->ReadBandwidthPlaneChroma,
2252 v->ReturnBusWidth,
2253 &v->DCFCLKDeepSleep);
2254
2255 // DSCCLK
2256 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2257 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2258 v->DSCCLK_calculated[k] = 0.0;
2259 } else {
2260 if (v->OutputFormat[k] == dm_420)
2261 v->DSCFormatFactor = 2;
2262 else if (v->OutputFormat[k] == dm_444)
2263 v->DSCFormatFactor = 1;
2264 else if (v->OutputFormat[k] == dm_n422)
2265 v->DSCFormatFactor = 2;
2266 else
2267 v->DSCFormatFactor = 1;
2268 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2269 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2270 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2271 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2272 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2273 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2274 else
2275 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2276 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2277 }
2278 }
2279
2280 // DSC Delay
2281 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2282 double BPP = v->OutputBpp[k];
2283
2284 if (v->DSCEnabled[k] && BPP != 0) {
2285 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2286 v->DSCDelay[k] = dscceComputeDelay(
2287 v->DSCInputBitPerComponent[k],
2288 BPP,
2289 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2290 v->NumberOfDSCSlices[k],
2291 v->OutputFormat[k],
2292 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2293 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2294 v->DSCDelay[k] = 2
2295 * (dscceComputeDelay(
2296 v->DSCInputBitPerComponent[k],
2297 BPP,
2298 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2299 v->NumberOfDSCSlices[k] / 2.0,
2300 v->OutputFormat[k],
2301 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2302 } else {
2303 v->DSCDelay[k] = 4
2304 * (dscceComputeDelay(
2305 v->DSCInputBitPerComponent[k],
2306 BPP,
2307 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2308 v->NumberOfDSCSlices[k] / 4.0,
2309 v->OutputFormat[k],
2310 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2311 }
2312 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2313 } else {
2314 v->DSCDelay[k] = 0;
2315 }
2316 }
2317
2318 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2319 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2320 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2321 v->DSCDelay[k] = v->DSCDelay[j];
2322
2323 // Prefetch
2324 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2325 unsigned int PDEAndMetaPTEBytesFrameY;
2326 unsigned int PixelPTEBytesPerRowY;
2327 unsigned int MetaRowByteY;
2328 unsigned int MetaRowByteC;
2329 unsigned int PDEAndMetaPTEBytesFrameC;
2330 unsigned int PixelPTEBytesPerRowC;
2331 bool PTEBufferSizeNotExceededY;
2332 bool PTEBufferSizeNotExceededC;
2333
2334 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2335 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2336 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2337 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2338 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2339 } else {
2340 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2341 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2342 }
2343
2344 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2345 mode_lib,
2346 v->DCCEnable[k],
2347 v->BlockHeight256BytesC[k],
2348 v->BlockWidth256BytesC[k],
2349 v->SourcePixelFormat[k],
2350 v->SurfaceTiling[k],
2351 v->BytePerPixelC[k],
2352 v->SourceScan[k],
2353 v->SwathWidthC[k],
2354 v->ViewportHeightChroma[k],
2355 v->GPUVMEnable,
2356 v->HostVMEnable,
2357 v->HostVMMaxNonCachedPageTableLevels,
2358 v->GPUVMMinPageSize,
2359 v->HostVMMinPageSize,
2360 v->PTEBufferSizeInRequestsForChroma,
2361 v->PitchC[k],
2362 v->DCCMetaPitchC[k],
2363 &v->MacroTileWidthC[k],
2364 &MetaRowByteC,
2365 &PixelPTEBytesPerRowC,
2366 &PTEBufferSizeNotExceededC,
2367 &v->dpte_row_width_chroma_ub[k],
2368 &v->dpte_row_height_chroma[k],
2369 &v->meta_req_width_chroma[k],
2370 &v->meta_req_height_chroma[k],
2371 &v->meta_row_width_chroma[k],
2372 &v->meta_row_height_chroma[k],
2373 &v->dummyinteger1,
2374 &v->dummyinteger2,
2375 &v->PixelPTEReqWidthC[k],
2376 &v->PixelPTEReqHeightC[k],
2377 &v->PTERequestSizeC[k],
2378 &v->dpde0_bytes_per_frame_ub_c[k],
2379 &v->meta_pte_bytes_per_frame_ub_c[k]);
2380
2381 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2382 mode_lib,
2383 v->VRatioChroma[k],
2384 v->VTAPsChroma[k],
2385 v->Interlace[k],
2386 v->ProgressiveToInterlaceUnitInOPP,
2387 v->SwathHeightC[k],
2388 v->ViewportYStartC[k],
2389 &v->VInitPreFillC[k],
2390 &v->MaxNumSwathC[k]);
2391 } else {
2392 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2393 v->PTEBufferSizeInRequestsForChroma = 0;
2394 PixelPTEBytesPerRowC = 0;
2395 PDEAndMetaPTEBytesFrameC = 0;
2396 MetaRowByteC = 0;
2397 v->MaxNumSwathC[k] = 0;
2398 v->PrefetchSourceLinesC[k] = 0;
2399 }
2400
2401 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2402 mode_lib,
2403 v->DCCEnable[k],
2404 v->BlockHeight256BytesY[k],
2405 v->BlockWidth256BytesY[k],
2406 v->SourcePixelFormat[k],
2407 v->SurfaceTiling[k],
2408 v->BytePerPixelY[k],
2409 v->SourceScan[k],
2410 v->SwathWidthY[k],
2411 v->ViewportHeight[k],
2412 v->GPUVMEnable,
2413 v->HostVMEnable,
2414 v->HostVMMaxNonCachedPageTableLevels,
2415 v->GPUVMMinPageSize,
2416 v->HostVMMinPageSize,
2417 v->PTEBufferSizeInRequestsForLuma,
2418 v->PitchY[k],
2419 v->DCCMetaPitchY[k],
2420 &v->MacroTileWidthY[k],
2421 &MetaRowByteY,
2422 &PixelPTEBytesPerRowY,
2423 &PTEBufferSizeNotExceededY,
2424 &v->dpte_row_width_luma_ub[k],
2425 &v->dpte_row_height[k],
2426 &v->meta_req_width[k],
2427 &v->meta_req_height[k],
2428 &v->meta_row_width[k],
2429 &v->meta_row_height[k],
2430 &v->vm_group_bytes[k],
2431 &v->dpte_group_bytes[k],
2432 &v->PixelPTEReqWidthY[k],
2433 &v->PixelPTEReqHeightY[k],
2434 &v->PTERequestSizeY[k],
2435 &v->dpde0_bytes_per_frame_ub_l[k],
2436 &v->meta_pte_bytes_per_frame_ub_l[k]);
2437
2438 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2439 mode_lib,
2440 v->VRatio[k],
2441 v->vtaps[k],
2442 v->Interlace[k],
2443 v->ProgressiveToInterlaceUnitInOPP,
2444 v->SwathHeightY[k],
2445 v->ViewportYStartY[k],
2446 &v->VInitPreFillY[k],
2447 &v->MaxNumSwathY[k]);
2448 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2449 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2450 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2451
2452 CalculateRowBandwidth(
2453 v->GPUVMEnable,
2454 v->SourcePixelFormat[k],
2455 v->VRatio[k],
2456 v->VRatioChroma[k],
2457 v->DCCEnable[k],
2458 v->HTotal[k] / v->PixelClock[k],
2459 MetaRowByteY,
2460 MetaRowByteC,
2461 v->meta_row_height[k],
2462 v->meta_row_height_chroma[k],
2463 PixelPTEBytesPerRowY,
2464 PixelPTEBytesPerRowC,
2465 v->dpte_row_height[k],
2466 v->dpte_row_height_chroma[k],
2467 &v->meta_row_bw[k],
2468 &v->dpte_row_bw[k]);
2469 }
2470
2471 v->TotalDCCActiveDPP = 0;
2472 v->TotalActiveDPP = 0;
2473 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2474 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2475 if (v->DCCEnable[k])
2476 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2477 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2478 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2479 NoChromaPlanes = false;
2480 }
2481
2482 ReorderBytes = v->NumberOfChannels
2483 * dml_max3(
2484 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2485 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2486 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2487
2488 VMDataOnlyReturnBW = dml_min(
2489 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2490 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2491 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2492 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2493
2494 #ifdef __DML_VBA_DEBUG__
2495 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2496 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2497 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2498 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2499 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2500 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2501 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2502 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2503 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2504 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2505 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2506 #endif
2507
2508 if (v->GPUVMEnable && v->HostVMEnable)
2509 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2510
2511 v->UrgentExtraLatency = CalculateExtraLatency(
2512 v->RoundTripPingLatencyCycles,
2513 ReorderBytes,
2514 v->DCFCLK,
2515 v->TotalActiveDPP,
2516 v->PixelChunkSizeInKByte,
2517 v->TotalDCCActiveDPP,
2518 v->MetaChunkSize,
2519 v->ReturnBW,
2520 v->GPUVMEnable,
2521 v->HostVMEnable,
2522 v->NumberOfActivePlanes,
2523 v->DPPPerPlane,
2524 v->dpte_group_bytes,
2525 HostVMInefficiencyFactor,
2526 v->HostVMMinPageSize,
2527 v->HostVMMaxNonCachedPageTableLevels);
2528
2529 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2530
2531 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2532 if (v->BlendingAndTiming[k] == k) {
2533 if (v->WritebackEnable[k] == true) {
2534 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2535 + CalculateWriteBackDelay(
2536 v->WritebackPixelFormat[k],
2537 v->WritebackHRatio[k],
2538 v->WritebackVRatio[k],
2539 v->WritebackVTaps[k],
2540 v->WritebackDestinationWidth[k],
2541 v->WritebackDestinationHeight[k],
2542 v->WritebackSourceHeight[k],
2543 v->HTotal[k]) / v->DISPCLK;
2544 } else
2545 v->WritebackDelay[v->VoltageLevel][k] = 0;
2546 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2547 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2548 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2549 v->WritebackDelay[v->VoltageLevel][k],
2550 v->WritebackLatency
2551 + CalculateWriteBackDelay(
2552 v->WritebackPixelFormat[j],
2553 v->WritebackHRatio[j],
2554 v->WritebackVRatio[j],
2555 v->WritebackVTaps[j],
2556 v->WritebackDestinationWidth[j],
2557 v->WritebackDestinationHeight[j],
2558 v->WritebackSourceHeight[j],
2559 v->HTotal[k]) / v->DISPCLK);
2560 }
2561 }
2562 }
2563 }
2564
2565 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2566 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2567 if (v->BlendingAndTiming[k] == j)
2568 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2569
2570 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2571 v->MaxVStartupLines[k] =
2572 CalculateMaxVStartup(
2573 v->VTotal[k],
2574 v->VActive[k],
2575 v->VBlankNom[k],
2576 v->HTotal[k],
2577 v->PixelClock[k],
2578 v->ProgressiveToInterlaceUnitInOPP,
2579 v->Interlace[k],
2580 v->ip.VBlankNomDefaultUS,
2581 v->WritebackDelay[v->VoltageLevel][k]);
2582
2583 #ifdef __DML_VBA_DEBUG__
2584 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2585 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2586 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2587 #endif
2588 }
2589
2590 v->MaximumMaxVStartupLines = 0;
2591 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2592 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2593
2594 // VBA_DELTA
2595 // We don't really care to iterate between the various prefetch modes
2596 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2597
2598 v->UrgentLatency = CalculateUrgentLatency(
2599 v->UrgentLatencyPixelDataOnly,
2600 v->UrgentLatencyPixelMixedWithVMData,
2601 v->UrgentLatencyVMDataOnly,
2602 v->DoUrgentLatencyAdjustment,
2603 v->UrgentLatencyAdjustmentFabricClockComponent,
2604 v->UrgentLatencyAdjustmentFabricClockReference,
2605 v->FabricClock);
2606
2607 v->FractionOfUrgentBandwidth = 0.0;
2608 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2609
2610 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2611
2612 do {
2613 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2614 bool DestinationLineTimesForPrefetchLessThan2 = false;
2615 bool VRatioPrefetchMoreThan4 = false;
2616 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2617
2618 MaxTotalRDBandwidth = 0;
2619
2620 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2621
2622 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2623 Pipe myPipe;
2624
2625 myPipe.DPPCLK = v->DPPCLK[k];
2626 myPipe.DISPCLK = v->DISPCLK;
2627 myPipe.PixelClock = v->PixelClock[k];
2628 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2629 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2630 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2631 myPipe.VRatio = v->VRatio[k];
2632 myPipe.VRatioChroma = v->VRatioChroma[k];
2633 myPipe.SourceScan = v->SourceScan[k];
2634 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2635 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2636 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2637 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2638 myPipe.InterlaceEnable = v->Interlace[k];
2639 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2640 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2641 myPipe.HTotal = v->HTotal[k];
2642 myPipe.DCCEnable = v->DCCEnable[k];
2643 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2644 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2645 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2646 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2647 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2648 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2649 v->ErrorResult[k] = CalculatePrefetchSchedule(
2650 mode_lib,
2651 HostVMInefficiencyFactor,
2652 &myPipe,
2653 v->DSCDelay[k],
2654 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2655 v->DPPCLKDelaySCL,
2656 v->DPPCLKDelaySCLLBOnly,
2657 v->DPPCLKDelayCNVCCursor,
2658 v->DISPCLKDelaySubtotal,
2659 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2660 v->OutputFormat[k],
2661 v->MaxInterDCNTileRepeaters,
2662 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2663 v->MaxVStartupLines[k],
2664 v->GPUVMMaxPageTableLevels,
2665 v->GPUVMEnable,
2666 v->HostVMEnable,
2667 v->HostVMMaxNonCachedPageTableLevels,
2668 v->HostVMMinPageSize,
2669 v->DynamicMetadataEnable[k],
2670 v->DynamicMetadataVMEnabled,
2671 v->DynamicMetadataLinesBeforeActiveRequired[k],
2672 v->DynamicMetadataTransmittedBytes[k],
2673 v->UrgentLatency,
2674 v->UrgentExtraLatency,
2675 v->TCalc,
2676 v->PDEAndMetaPTEBytesFrame[k],
2677 v->MetaRowByte[k],
2678 v->PixelPTEBytesPerRow[k],
2679 v->PrefetchSourceLinesY[k],
2680 v->SwathWidthY[k],
2681 v->VInitPreFillY[k],
2682 v->MaxNumSwathY[k],
2683 v->PrefetchSourceLinesC[k],
2684 v->SwathWidthC[k],
2685 v->VInitPreFillC[k],
2686 v->MaxNumSwathC[k],
2687 v->swath_width_luma_ub[k],
2688 v->swath_width_chroma_ub[k],
2689 v->SwathHeightY[k],
2690 v->SwathHeightC[k],
2691 TWait,
2692 &v->DSTXAfterScaler[k],
2693 &v->DSTYAfterScaler[k],
2694 &v->DestinationLinesForPrefetch[k],
2695 &v->PrefetchBandwidth[k],
2696 &v->DestinationLinesToRequestVMInVBlank[k],
2697 &v->DestinationLinesToRequestRowInVBlank[k],
2698 &v->VRatioPrefetchY[k],
2699 &v->VRatioPrefetchC[k],
2700 &v->RequiredPrefetchPixDataBWLuma[k],
2701 &v->RequiredPrefetchPixDataBWChroma[k],
2702 &v->NotEnoughTimeForDynamicMetadata[k],
2703 &v->Tno_bw[k],
2704 &v->prefetch_vmrow_bw[k],
2705 &v->Tdmdl_vm[k],
2706 &v->Tdmdl[k],
2707 &v->TSetup[k],
2708 &v->VUpdateOffsetPix[k],
2709 &v->VUpdateWidthPix[k],
2710 &v->VReadyOffsetPix[k]);
2711
2712 #ifdef __DML_VBA_DEBUG__
2713 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2714 #endif
2715 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2716 }
2717
2718 v->NoEnoughUrgentLatencyHiding = false;
2719 v->NoEnoughUrgentLatencyHidingPre = false;
2720
2721 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2722 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2723 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2724 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2725 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2726
2727 CalculateUrgentBurstFactor(
2728 v->swath_width_luma_ub[k],
2729 v->swath_width_chroma_ub[k],
2730 v->SwathHeightY[k],
2731 v->SwathHeightC[k],
2732 v->HTotal[k] / v->PixelClock[k],
2733 v->UrgentLatency,
2734 v->CursorBufferSize,
2735 v->CursorWidth[k][0],
2736 v->CursorBPP[k][0],
2737 v->VRatio[k],
2738 v->VRatioChroma[k],
2739 v->BytePerPixelDETY[k],
2740 v->BytePerPixelDETC[k],
2741 v->DETBufferSizeY[k],
2742 v->DETBufferSizeC[k],
2743 &v->UrgBurstFactorCursor[k],
2744 &v->UrgBurstFactorLuma[k],
2745 &v->UrgBurstFactorChroma[k],
2746 &v->NoUrgentLatencyHiding[k]);
2747
2748 CalculateUrgentBurstFactor(
2749 v->swath_width_luma_ub[k],
2750 v->swath_width_chroma_ub[k],
2751 v->SwathHeightY[k],
2752 v->SwathHeightC[k],
2753 v->HTotal[k] / v->PixelClock[k],
2754 v->UrgentLatency,
2755 v->CursorBufferSize,
2756 v->CursorWidth[k][0],
2757 v->CursorBPP[k][0],
2758 v->VRatioPrefetchY[k],
2759 v->VRatioPrefetchC[k],
2760 v->BytePerPixelDETY[k],
2761 v->BytePerPixelDETC[k],
2762 v->DETBufferSizeY[k],
2763 v->DETBufferSizeC[k],
2764 &v->UrgBurstFactorCursorPre[k],
2765 &v->UrgBurstFactorLumaPre[k],
2766 &v->UrgBurstFactorChromaPre[k],
2767 &v->NoUrgentLatencyHidingPre[k]);
2768
2769 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2770 + dml_max3(
2771 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2772 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2773 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2774 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2775 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2776 v->DPPPerPlane[k]
2777 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2778 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2779 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2780
2781 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2782 + dml_max3(
2783 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2784 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2785 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2786 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2787 + v->cursor_bw_pre[k]);
2788
2789 #ifdef __DML_VBA_DEBUG__
2790 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2791 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2792 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2793 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2794 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2795
2796 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2797 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2798
2799 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2800 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2801 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2802 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2803 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2804 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2805 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2806 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2807 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2808 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2809 #endif
2810
2811 if (v->DestinationLinesForPrefetch[k] < 2)
2812 DestinationLineTimesForPrefetchLessThan2 = true;
2813
2814 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2815 VRatioPrefetchMoreThan4 = true;
2816
2817 if (v->NoUrgentLatencyHiding[k] == true)
2818 v->NoEnoughUrgentLatencyHiding = true;
2819
2820 if (v->NoUrgentLatencyHidingPre[k] == true)
2821 v->NoEnoughUrgentLatencyHidingPre = true;
2822 }
2823
2824 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2825
2826 #ifdef __DML_VBA_DEBUG__
2827 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2828 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2829 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2830 #endif
2831
2832 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2833 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2834 v->PrefetchModeSupported = true;
2835 else {
2836 v->PrefetchModeSupported = false;
2837 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2838 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2839 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2840 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2841 }
2842
2843 // PREVIOUS_ERROR
2844 // This error result check was done after the PrefetchModeSupported. So we will
2845 // still try to calculate flip schedule even prefetch mode not supported
2846 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2847 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2848 v->PrefetchModeSupported = false;
2849 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2850 }
2851 }
2852
2853 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2854 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2855 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2856 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2857 - dml_max(
2858 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2859 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2860 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2861 v->DPPPerPlane[k]
2862 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2863 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2864 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2865 }
2866
2867 v->TotImmediateFlipBytes = 0;
2868 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2869 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2870 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2871 }
2872 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2873 CalculateFlipSchedule(
2874 mode_lib,
2875 k,
2876 HostVMInefficiencyFactor,
2877 v->UrgentExtraLatency,
2878 v->UrgentLatency,
2879 v->PDEAndMetaPTEBytesFrame[k],
2880 v->MetaRowByte[k],
2881 v->PixelPTEBytesPerRow[k]);
2882 }
2883
2884 v->total_dcn_read_bw_with_flip = 0.0;
2885 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2886 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2887 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2888 + dml_max3(
2889 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2890 v->DPPPerPlane[k] * v->final_flip_bw[k]
2891 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2892 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2893 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2894 v->DPPPerPlane[k]
2895 * (v->final_flip_bw[k]
2896 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2897 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2898 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2899 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2900 + dml_max3(
2901 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2902 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2903 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2904 v->DPPPerPlane[k]
2905 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2906 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2907 }
2908 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2909
2910 v->ImmediateFlipSupported = true;
2911 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2912 #ifdef __DML_VBA_DEBUG__
2913 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2914 #endif
2915 v->ImmediateFlipSupported = false;
2916 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2917 }
2918 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2919 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2920 #ifdef __DML_VBA_DEBUG__
2921 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2922 #endif
2923 v->ImmediateFlipSupported = false;
2924 }
2925 }
2926 } else {
2927 v->ImmediateFlipSupported = false;
2928 }
2929
2930 v->PrefetchAndImmediateFlipSupported =
2931 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2932 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2933 v->ImmediateFlipSupported)) ? true : false;
2934 #ifdef __DML_VBA_DEBUG__
2935 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2936 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2937 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2938 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2939 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2940 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2941 #endif
2942 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2943
2944 v->VStartupLines = v->VStartupLines + 1;
2945 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2946 ASSERT(v->PrefetchAndImmediateFlipSupported);
2947
2948 // Unbounded Request Enabled
2949 CalculateUnboundedRequestAndCompressedBufferSize(
2950 v->DETBufferSizeInKByte[0],
2951 v->ConfigReturnBufferSizeInKByte,
2952 v->UseUnboundedRequesting,
2953 v->TotalActiveDPP,
2954 NoChromaPlanes,
2955 v->MaxNumDPP,
2956 v->CompressedBufferSegmentSizeInkByte,
2957 v->Output,
2958 &v->UnboundedRequestEnabled,
2959 &v->CompressedBufferSizeInkByte);
2960
2961 //Watermarks and NB P-State/DRAM Clock Change Support
2962 {
2963 enum clock_change_support DRAMClockChangeSupport; // dummy
2964
2965 CalculateWatermarksAndDRAMSpeedChangeSupport(
2966 mode_lib,
2967 PrefetchMode,
2968 v->DCFCLK,
2969 v->ReturnBW,
2970 v->UrgentLatency,
2971 v->UrgentExtraLatency,
2972 v->SOCCLK,
2973 v->DCFCLKDeepSleep,
2974 v->DETBufferSizeY,
2975 v->DETBufferSizeC,
2976 v->SwathHeightY,
2977 v->SwathHeightC,
2978 v->SwathWidthY,
2979 v->SwathWidthC,
2980 v->DPPPerPlane,
2981 v->BytePerPixelDETY,
2982 v->BytePerPixelDETC,
2983 v->UnboundedRequestEnabled,
2984 v->CompressedBufferSizeInkByte,
2985 &DRAMClockChangeSupport,
2986 &v->StutterExitWatermark,
2987 &v->StutterEnterPlusExitWatermark,
2988 &v->Z8StutterExitWatermark,
2989 &v->Z8StutterEnterPlusExitWatermark);
2990
2991 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2992 if (v->WritebackEnable[k] == true) {
2993 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2994 0,
2995 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2996 } else {
2997 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2998 }
2999 }
3000 }
3001
3002 //Display Pipeline Delivery Time in Prefetch, Groups
3003 CalculatePixelDeliveryTimes(
3004 v->NumberOfActivePlanes,
3005 v->VRatio,
3006 v->VRatioChroma,
3007 v->VRatioPrefetchY,
3008 v->VRatioPrefetchC,
3009 v->swath_width_luma_ub,
3010 v->swath_width_chroma_ub,
3011 v->DPPPerPlane,
3012 v->HRatio,
3013 v->HRatioChroma,
3014 v->PixelClock,
3015 v->PSCL_THROUGHPUT_LUMA,
3016 v->PSCL_THROUGHPUT_CHROMA,
3017 v->DPPCLK,
3018 v->BytePerPixelC,
3019 v->SourceScan,
3020 v->NumberOfCursors,
3021 v->CursorWidth,
3022 v->CursorBPP,
3023 v->BlockWidth256BytesY,
3024 v->BlockHeight256BytesY,
3025 v->BlockWidth256BytesC,
3026 v->BlockHeight256BytesC,
3027 v->DisplayPipeLineDeliveryTimeLuma,
3028 v->DisplayPipeLineDeliveryTimeChroma,
3029 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3030 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3031 v->DisplayPipeRequestDeliveryTimeLuma,
3032 v->DisplayPipeRequestDeliveryTimeChroma,
3033 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3034 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3035 v->CursorRequestDeliveryTime,
3036 v->CursorRequestDeliveryTimePrefetch);
3037
3038 CalculateMetaAndPTETimes(
3039 v->NumberOfActivePlanes,
3040 v->GPUVMEnable,
3041 v->MetaChunkSize,
3042 v->MinMetaChunkSizeBytes,
3043 v->HTotal,
3044 v->VRatio,
3045 v->VRatioChroma,
3046 v->DestinationLinesToRequestRowInVBlank,
3047 v->DestinationLinesToRequestRowInImmediateFlip,
3048 v->DCCEnable,
3049 v->PixelClock,
3050 v->BytePerPixelY,
3051 v->BytePerPixelC,
3052 v->SourceScan,
3053 v->dpte_row_height,
3054 v->dpte_row_height_chroma,
3055 v->meta_row_width,
3056 v->meta_row_width_chroma,
3057 v->meta_row_height,
3058 v->meta_row_height_chroma,
3059 v->meta_req_width,
3060 v->meta_req_width_chroma,
3061 v->meta_req_height,
3062 v->meta_req_height_chroma,
3063 v->dpte_group_bytes,
3064 v->PTERequestSizeY,
3065 v->PTERequestSizeC,
3066 v->PixelPTEReqWidthY,
3067 v->PixelPTEReqHeightY,
3068 v->PixelPTEReqWidthC,
3069 v->PixelPTEReqHeightC,
3070 v->dpte_row_width_luma_ub,
3071 v->dpte_row_width_chroma_ub,
3072 v->DST_Y_PER_PTE_ROW_NOM_L,
3073 v->DST_Y_PER_PTE_ROW_NOM_C,
3074 v->DST_Y_PER_META_ROW_NOM_L,
3075 v->DST_Y_PER_META_ROW_NOM_C,
3076 v->TimePerMetaChunkNominal,
3077 v->TimePerChromaMetaChunkNominal,
3078 v->TimePerMetaChunkVBlank,
3079 v->TimePerChromaMetaChunkVBlank,
3080 v->TimePerMetaChunkFlip,
3081 v->TimePerChromaMetaChunkFlip,
3082 v->time_per_pte_group_nom_luma,
3083 v->time_per_pte_group_vblank_luma,
3084 v->time_per_pte_group_flip_luma,
3085 v->time_per_pte_group_nom_chroma,
3086 v->time_per_pte_group_vblank_chroma,
3087 v->time_per_pte_group_flip_chroma);
3088
3089 CalculateVMGroupAndRequestTimes(
3090 v->NumberOfActivePlanes,
3091 v->GPUVMEnable,
3092 v->GPUVMMaxPageTableLevels,
3093 v->HTotal,
3094 v->BytePerPixelC,
3095 v->DestinationLinesToRequestVMInVBlank,
3096 v->DestinationLinesToRequestVMInImmediateFlip,
3097 v->DCCEnable,
3098 v->PixelClock,
3099 v->dpte_row_width_luma_ub,
3100 v->dpte_row_width_chroma_ub,
3101 v->vm_group_bytes,
3102 v->dpde0_bytes_per_frame_ub_l,
3103 v->dpde0_bytes_per_frame_ub_c,
3104 v->meta_pte_bytes_per_frame_ub_l,
3105 v->meta_pte_bytes_per_frame_ub_c,
3106 v->TimePerVMGroupVBlank,
3107 v->TimePerVMGroupFlip,
3108 v->TimePerVMRequestVBlank,
3109 v->TimePerVMRequestFlip);
3110
3111 // Min TTUVBlank
3112 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3113 if (PrefetchMode == 0) {
3114 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3115 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3116 v->MinTTUVBlank[k] = dml_max(
3117 v->DRAMClockChangeWatermark,
3118 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3119 } else if (PrefetchMode == 1) {
3120 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3121 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3122 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3123 } else {
3124 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3125 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3126 v->MinTTUVBlank[k] = v->UrgentWatermark;
3127 }
3128 if (!v->DynamicMetadataEnable[k])
3129 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3130 }
3131
3132 // DCC Configuration
3133 v->ActiveDPPs = 0;
3134 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3135 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3136 v->SourcePixelFormat[k],
3137 v->SurfaceWidthY[k],
3138 v->SurfaceWidthC[k],
3139 v->SurfaceHeightY[k],
3140 v->SurfaceHeightC[k],
3141 v->DETBufferSizeInKByte[0] * 1024,
3142 v->BlockHeight256BytesY[k],
3143 v->BlockHeight256BytesC[k],
3144 v->SurfaceTiling[k],
3145 v->BytePerPixelY[k],
3146 v->BytePerPixelC[k],
3147 v->BytePerPixelDETY[k],
3148 v->BytePerPixelDETC[k],
3149 v->SourceScan[k],
3150 &v->DCCYMaxUncompressedBlock[k],
3151 &v->DCCCMaxUncompressedBlock[k],
3152 &v->DCCYMaxCompressedBlock[k],
3153 &v->DCCCMaxCompressedBlock[k],
3154 &v->DCCYIndependentBlock[k],
3155 &v->DCCCIndependentBlock[k]);
3156 }
3157
3158 // VStartup Adjustment
3159 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3160 bool isInterlaceTiming;
3161 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3162 #ifdef __DML_VBA_DEBUG__
3163 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3164 #endif
3165
3166 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3167
3168 #ifdef __DML_VBA_DEBUG__
3169 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3170 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3171 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3172 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3173 #endif
3174
3175 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3176 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3177 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3178 }
3179
3180 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3181 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3182 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3183 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
3184 } else {
3185 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3186 }
3187 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3188 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3189 <= (isInterlaceTiming ?
3190 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3191 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3192 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3193 } else {
3194 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3195 }
3196 #ifdef __DML_VBA_DEBUG__
3197 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3198 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3199 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3200 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3201 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3202 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3203 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3204 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3205 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3206 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3207 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3208 #endif
3209 }
3210
3211 {
3212 //Maximum Bandwidth Used
3213 double TotalWRBandwidth = 0;
3214 double MaxPerPlaneVActiveWRBandwidth = 0;
3215 double WRBandwidth = 0;
3216
3217 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3218 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3219 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3220 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3221 } else if (v->WritebackEnable[k] == true) {
3222 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3223 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3224 }
3225 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3226 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3227 }
3228
3229 v->TotalDataReadBandwidth = 0;
3230 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3231 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3232 }
3233 }
3234 // Stutter Efficiency
3235 CalculateStutterEfficiency(
3236 mode_lib,
3237 v->CompressedBufferSizeInkByte,
3238 v->UnboundedRequestEnabled,
3239 v->ConfigReturnBufferSizeInKByte,
3240 v->MetaFIFOSizeInKEntries,
3241 v->ZeroSizeBufferEntries,
3242 v->NumberOfActivePlanes,
3243 v->ROBBufferSizeInKByte,
3244 v->TotalDataReadBandwidth,
3245 v->DCFCLK,
3246 v->ReturnBW,
3247 v->COMPBUF_RESERVED_SPACE_64B,
3248 v->COMPBUF_RESERVED_SPACE_ZS,
3249 v->SRExitTime,
3250 v->SRExitZ8Time,
3251 v->SynchronizedVBlank,
3252 v->StutterEnterPlusExitWatermark,
3253 v->Z8StutterEnterPlusExitWatermark,
3254 v->ProgressiveToInterlaceUnitInOPP,
3255 v->Interlace,
3256 v->MinTTUVBlank,
3257 v->DPPPerPlane,
3258 v->DETBufferSizeY,
3259 v->BytePerPixelY,
3260 v->BytePerPixelDETY,
3261 v->SwathWidthY,
3262 v->SwathHeightY,
3263 v->SwathHeightC,
3264 v->DCCRateLuma,
3265 v->DCCRateChroma,
3266 v->DCCFractionOfZeroSizeRequestsLuma,
3267 v->DCCFractionOfZeroSizeRequestsChroma,
3268 v->HTotal,
3269 v->VTotal,
3270 v->PixelClock,
3271 v->VRatio,
3272 v->SourceScan,
3273 v->BlockHeight256BytesY,
3274 v->BlockWidth256BytesY,
3275 v->BlockHeight256BytesC,
3276 v->BlockWidth256BytesC,
3277 v->DCCYMaxUncompressedBlock,
3278 v->DCCCMaxUncompressedBlock,
3279 v->VActive,
3280 v->DCCEnable,
3281 v->WritebackEnable,
3282 v->ReadBandwidthPlaneLuma,
3283 v->ReadBandwidthPlaneChroma,
3284 v->meta_row_bw,
3285 v->dpte_row_bw,
3286 &v->StutterEfficiencyNotIncludingVBlank,
3287 &v->StutterEfficiency,
3288 &v->NumberOfStutterBurstsPerFrame,
3289 &v->Z8StutterEfficiencyNotIncludingVBlank,
3290 &v->Z8StutterEfficiency,
3291 &v->Z8NumberOfStutterBurstsPerFrame,
3292 &v->StutterPeriod);
3293 }
3294
3295 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3296 {
3297 struct vba_vars_st *v = &mode_lib->vba;
3298 // Display Pipe Configuration
3299 double BytePerPixDETY[DC__NUM_DPP__MAX];
3300 double BytePerPixDETC[DC__NUM_DPP__MAX];
3301 int BytePerPixY[DC__NUM_DPP__MAX];
3302 int BytePerPixC[DC__NUM_DPP__MAX];
3303 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3304 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3305 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3306 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3307 double dummy1[DC__NUM_DPP__MAX];
3308 double dummy2[DC__NUM_DPP__MAX];
3309 double dummy3[DC__NUM_DPP__MAX];
3310 double dummy4[DC__NUM_DPP__MAX];
3311 int dummy5[DC__NUM_DPP__MAX];
3312 int dummy6[DC__NUM_DPP__MAX];
3313 bool dummy7[DC__NUM_DPP__MAX];
3314 bool dummysinglestring;
3315
3316 unsigned int k;
3317
3318 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3319
3320 CalculateBytePerPixelAnd256BBlockSizes(
3321 v->SourcePixelFormat[k],
3322 v->SurfaceTiling[k],
3323 &BytePerPixY[k],
3324 &BytePerPixC[k],
3325 &BytePerPixDETY[k],
3326 &BytePerPixDETC[k],
3327 &Read256BytesBlockHeightY[k],
3328 &Read256BytesBlockHeightC[k],
3329 &Read256BytesBlockWidthY[k],
3330 &Read256BytesBlockWidthC[k]);
3331 }
3332
3333 CalculateSwathAndDETConfiguration(
3334 false,
3335 v->NumberOfActivePlanes,
3336 v->DETBufferSizeInKByte[0],
3337 dummy1,
3338 dummy2,
3339 v->SourceScan,
3340 v->SourcePixelFormat,
3341 v->SurfaceTiling,
3342 v->ViewportWidth,
3343 v->ViewportHeight,
3344 v->SurfaceWidthY,
3345 v->SurfaceWidthC,
3346 v->SurfaceHeightY,
3347 v->SurfaceHeightC,
3348 Read256BytesBlockHeightY,
3349 Read256BytesBlockHeightC,
3350 Read256BytesBlockWidthY,
3351 Read256BytesBlockWidthC,
3352 v->ODMCombineEnabled,
3353 v->BlendingAndTiming,
3354 BytePerPixY,
3355 BytePerPixC,
3356 BytePerPixDETY,
3357 BytePerPixDETC,
3358 v->HActive,
3359 v->HRatio,
3360 v->HRatioChroma,
3361 v->DPPPerPlane,
3362 dummy5,
3363 dummy6,
3364 dummy3,
3365 dummy4,
3366 v->SwathHeightY,
3367 v->SwathHeightC,
3368 v->DETBufferSizeY,
3369 v->DETBufferSizeC,
3370 dummy7,
3371 &dummysinglestring);
3372 }
3373
3374 static bool CalculateBytePerPixelAnd256BBlockSizes(
3375 enum source_format_class SourcePixelFormat,
3376 enum dm_swizzle_mode SurfaceTiling,
3377 unsigned int *BytePerPixelY,
3378 unsigned int *BytePerPixelC,
3379 double *BytePerPixelDETY,
3380 double *BytePerPixelDETC,
3381 unsigned int *BlockHeight256BytesY,
3382 unsigned int *BlockHeight256BytesC,
3383 unsigned int *BlockWidth256BytesY,
3384 unsigned int *BlockWidth256BytesC)
3385 {
3386 if (SourcePixelFormat == dm_444_64) {
3387 *BytePerPixelDETY = 8;
3388 *BytePerPixelDETC = 0;
3389 *BytePerPixelY = 8;
3390 *BytePerPixelC = 0;
3391 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3392 *BytePerPixelDETY = 4;
3393 *BytePerPixelDETC = 0;
3394 *BytePerPixelY = 4;
3395 *BytePerPixelC = 0;
3396 } else if (SourcePixelFormat == dm_444_16) {
3397 *BytePerPixelDETY = 2;
3398 *BytePerPixelDETC = 0;
3399 *BytePerPixelY = 2;
3400 *BytePerPixelC = 0;
3401 } else if (SourcePixelFormat == dm_444_8) {
3402 *BytePerPixelDETY = 1;
3403 *BytePerPixelDETC = 0;
3404 *BytePerPixelY = 1;
3405 *BytePerPixelC = 0;
3406 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3407 *BytePerPixelDETY = 4;
3408 *BytePerPixelDETC = 1;
3409 *BytePerPixelY = 4;
3410 *BytePerPixelC = 1;
3411 } else if (SourcePixelFormat == dm_420_8) {
3412 *BytePerPixelDETY = 1;
3413 *BytePerPixelDETC = 2;
3414 *BytePerPixelY = 1;
3415 *BytePerPixelC = 2;
3416 } else if (SourcePixelFormat == dm_420_12) {
3417 *BytePerPixelDETY = 2;
3418 *BytePerPixelDETC = 4;
3419 *BytePerPixelY = 2;
3420 *BytePerPixelC = 4;
3421 } else {
3422 *BytePerPixelDETY = 4.0 / 3;
3423 *BytePerPixelDETC = 8.0 / 3;
3424 *BytePerPixelY = 2;
3425 *BytePerPixelC = 4;
3426 }
3427
3428 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3429 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3430 if (SurfaceTiling == dm_sw_linear) {
3431 *BlockHeight256BytesY = 1;
3432 } else if (SourcePixelFormat == dm_444_64) {
3433 *BlockHeight256BytesY = 4;
3434 } else if (SourcePixelFormat == dm_444_8) {
3435 *BlockHeight256BytesY = 16;
3436 } else {
3437 *BlockHeight256BytesY = 8;
3438 }
3439 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3440 *BlockHeight256BytesC = 0;
3441 *BlockWidth256BytesC = 0;
3442 } else {
3443 if (SurfaceTiling == dm_sw_linear) {
3444 *BlockHeight256BytesY = 1;
3445 *BlockHeight256BytesC = 1;
3446 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3447 *BlockHeight256BytesY = 8;
3448 *BlockHeight256BytesC = 16;
3449 } else if (SourcePixelFormat == dm_420_8) {
3450 *BlockHeight256BytesY = 16;
3451 *BlockHeight256BytesC = 8;
3452 } else {
3453 *BlockHeight256BytesY = 8;
3454 *BlockHeight256BytesC = 8;
3455 }
3456 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3457 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3458 }
3459 return true;
3460 }
3461
3462 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3463 {
3464 if (PrefetchMode == 0) {
3465 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3466 } else if (PrefetchMode == 1) {
3467 return dml_max(SREnterPlusExitTime, UrgentLatency);
3468 } else {
3469 return UrgentLatency;
3470 }
3471 }
3472
3473 double dml314_CalculateWriteBackDISPCLK(
3474 enum source_format_class WritebackPixelFormat,
3475 double PixelClock,
3476 double WritebackHRatio,
3477 double WritebackVRatio,
3478 unsigned int WritebackHTaps,
3479 unsigned int WritebackVTaps,
3480 long WritebackSourceWidth,
3481 long WritebackDestinationWidth,
3482 unsigned int HTotal,
3483 unsigned int WritebackLineBufferSize)
3484 {
3485 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3486
3487 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3488 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3489 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3490 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3491 }
3492
3493 static double CalculateWriteBackDelay(
3494 enum source_format_class WritebackPixelFormat,
3495 double WritebackHRatio,
3496 double WritebackVRatio,
3497 unsigned int WritebackVTaps,
3498 int WritebackDestinationWidth,
3499 int WritebackDestinationHeight,
3500 int WritebackSourceHeight,
3501 unsigned int HTotal)
3502 {
3503 double CalculateWriteBackDelay;
3504 double Line_length;
3505 double Output_lines_last_notclamped;
3506 double WritebackVInit;
3507
3508 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3509 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3510 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3511 if (Output_lines_last_notclamped < 0) {
3512 CalculateWriteBackDelay = 0;
3513 } else {
3514 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3515 }
3516 return CalculateWriteBackDelay;
3517 }
3518
3519 static void CalculateVupdateAndDynamicMetadataParameters(
3520 int MaxInterDCNTileRepeaters,
3521 double DPPCLK,
3522 double DISPCLK,
3523 double DCFClkDeepSleep,
3524 double PixelClock,
3525 int HTotal,
3526 int VBlank,
3527 int DynamicMetadataTransmittedBytes,
3528 int DynamicMetadataLinesBeforeActiveRequired,
3529 int InterlaceEnable,
3530 bool ProgressiveToInterlaceUnitInOPP,
3531 double *TSetup,
3532 double *Tdmbf,
3533 double *Tdmec,
3534 double *Tdmsks,
3535 int *VUpdateOffsetPix,
3536 double *VUpdateWidthPix,
3537 double *VReadyOffsetPix)
3538 {
3539 double TotalRepeaterDelayTime;
3540
3541 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3542 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3543 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3544 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3545 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3546 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3547 *Tdmec = HTotal / PixelClock;
3548 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3549 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3550 } else {
3551 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3552 }
3553 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3554 *Tdmsks = *Tdmsks / 2;
3555 }
3556 #ifdef __DML_VBA_DEBUG__
3557 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3558 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3559 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3560 #endif
3561 }
3562
3563 static void CalculateRowBandwidth(
3564 bool GPUVMEnable,
3565 enum source_format_class SourcePixelFormat,
3566 double VRatio,
3567 double VRatioChroma,
3568 bool DCCEnable,
3569 double LineTime,
3570 unsigned int MetaRowByteLuma,
3571 unsigned int MetaRowByteChroma,
3572 unsigned int meta_row_height_luma,
3573 unsigned int meta_row_height_chroma,
3574 unsigned int PixelPTEBytesPerRowLuma,
3575 unsigned int PixelPTEBytesPerRowChroma,
3576 unsigned int dpte_row_height_luma,
3577 unsigned int dpte_row_height_chroma,
3578 double *meta_row_bw,
3579 double *dpte_row_bw)
3580 {
3581 if (DCCEnable != true) {
3582 *meta_row_bw = 0;
3583 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3584 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3585 } else {
3586 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3587 }
3588
3589 if (GPUVMEnable != true) {
3590 *dpte_row_bw = 0;
3591 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3592 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3593 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3594 } else {
3595 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3596 }
3597 }
3598
3599 static void CalculateFlipSchedule(
3600 struct display_mode_lib *mode_lib,
3601 unsigned int k,
3602 double HostVMInefficiencyFactor,
3603 double UrgentExtraLatency,
3604 double UrgentLatency,
3605 double PDEAndMetaPTEBytesPerFrame,
3606 double MetaRowBytes,
3607 double DPTEBytesPerRow)
3608 {
3609 struct vba_vars_st *v = &mode_lib->vba;
3610 double min_row_time = 0.0;
3611 unsigned int HostVMDynamicLevelsTrips;
3612 double TimeForFetchingMetaPTEImmediateFlip;
3613 double TimeForFetchingRowInVBlankImmediateFlip;
3614 double ImmediateFlipBW;
3615 double LineTime = v->HTotal[k] / v->PixelClock[k];
3616
3617 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3618 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3619 } else {
3620 HostVMDynamicLevelsTrips = 0;
3621 }
3622
3623 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3624 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3625 }
3626
3627 if (v->GPUVMEnable == true) {
3628 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3629 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3630 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3631 LineTime / 4.0);
3632 } else {
3633 TimeForFetchingMetaPTEImmediateFlip = 0;
3634 }
3635
3636 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3637 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3638 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3639 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3640 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3641 LineTime / 4);
3642 } else {
3643 TimeForFetchingRowInVBlankImmediateFlip = 0;
3644 }
3645
3646 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3647
3648 if (v->GPUVMEnable == true) {
3649 v->final_flip_bw[k] = dml_max(
3650 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3651 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3652 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3653 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3654 } else {
3655 v->final_flip_bw[k] = 0;
3656 }
3657
3658 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3659 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3660 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3661 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3662 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3663 } else {
3664 min_row_time = dml_min4(
3665 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3666 v->meta_row_height[k] * LineTime / v->VRatio[k],
3667 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3668 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3669 }
3670 } else {
3671 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3672 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3673 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3674 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3675 } else {
3676 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3677 }
3678 }
3679
3680 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3681 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3682 v->ImmediateFlipSupportedForPipe[k] = false;
3683 } else {
3684 v->ImmediateFlipSupportedForPipe[k] = true;
3685 }
3686
3687 #ifdef __DML_VBA_DEBUG__
3688 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3689 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3690 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3691 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3692 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3693 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3694 #endif
3695
3696 }
3697
3698 static double TruncToValidBPP(
3699 double LinkBitRate,
3700 int Lanes,
3701 int HTotal,
3702 int HActive,
3703 double PixelClock,
3704 double DesiredBPP,
3705 bool DSCEnable,
3706 enum output_encoder_class Output,
3707 enum output_format_class Format,
3708 unsigned int DSCInputBitPerComponent,
3709 int DSCSlices,
3710 int AudioRate,
3711 int AudioLayout,
3712 enum odm_combine_mode ODMCombine)
3713 {
3714 double MaxLinkBPP;
3715 int MinDSCBPP;
3716 double MaxDSCBPP;
3717 int NonDSCBPP0;
3718 int NonDSCBPP1;
3719 int NonDSCBPP2;
3720
3721 if (Format == dm_420) {
3722 NonDSCBPP0 = 12;
3723 NonDSCBPP1 = 15;
3724 NonDSCBPP2 = 18;
3725 MinDSCBPP = 6;
3726 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3727 } else if (Format == dm_444) {
3728 NonDSCBPP0 = 24;
3729 NonDSCBPP1 = 30;
3730 NonDSCBPP2 = 36;
3731 MinDSCBPP = 8;
3732 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3733 } else {
3734
3735 NonDSCBPP0 = 16;
3736 NonDSCBPP1 = 20;
3737 NonDSCBPP2 = 24;
3738
3739 if (Format == dm_n422) {
3740 MinDSCBPP = 7;
3741 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3742 } else {
3743 MinDSCBPP = 8;
3744 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3745 }
3746 }
3747
3748 if (DSCEnable && Output == dm_dp) {
3749 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3750 } else {
3751 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3752 }
3753
3754 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3755 MaxLinkBPP = 16;
3756 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3757 MaxLinkBPP = 32;
3758 }
3759
3760 if (DesiredBPP == 0) {
3761 if (DSCEnable) {
3762 if (MaxLinkBPP < MinDSCBPP) {
3763 return BPP_INVALID;
3764 } else if (MaxLinkBPP >= MaxDSCBPP) {
3765 return MaxDSCBPP;
3766 } else {
3767 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3768 }
3769 } else {
3770 if (MaxLinkBPP >= NonDSCBPP2) {
3771 return NonDSCBPP2;
3772 } else if (MaxLinkBPP >= NonDSCBPP1) {
3773 return NonDSCBPP1;
3774 } else if (MaxLinkBPP >= NonDSCBPP0) {
3775 return 16.0;
3776 } else {
3777 return BPP_INVALID;
3778 }
3779 }
3780 } else {
3781 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3782 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3783 return BPP_INVALID;
3784 } else {
3785 return DesiredBPP;
3786 }
3787 }
3788 return BPP_INVALID;
3789 }
3790
3791 static noinline void CalculatePrefetchSchedulePerPlane(
3792 struct display_mode_lib *mode_lib,
3793 double HostVMInefficiencyFactor,
3794 int i,
3795 unsigned int j,
3796 unsigned int k)
3797 {
3798 struct vba_vars_st *v = &mode_lib->vba;
3799 Pipe myPipe;
3800
3801 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3802 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3803 myPipe.PixelClock = v->PixelClock[k];
3804 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3805 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3806 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3807 myPipe.VRatio = mode_lib->vba.VRatio[k];
3808 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3809
3810 myPipe.SourceScan = v->SourceScan[k];
3811 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3812 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3813 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3814 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3815 myPipe.InterlaceEnable = v->Interlace[k];
3816 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3817 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3818 myPipe.HTotal = v->HTotal[k];
3819 myPipe.DCCEnable = v->DCCEnable[k];
3820 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3821 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3822 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3823 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3824 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3825 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3826 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3827 mode_lib,
3828 HostVMInefficiencyFactor,
3829 &myPipe,
3830 v->DSCDelayPerState[i][k],
3831 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3832 v->DPPCLKDelaySCL,
3833 v->DPPCLKDelaySCLLBOnly,
3834 v->DPPCLKDelayCNVCCursor,
3835 v->DISPCLKDelaySubtotal,
3836 v->SwathWidthYThisState[k] / v->HRatio[k],
3837 v->OutputFormat[k],
3838 v->MaxInterDCNTileRepeaters,
3839 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3840 v->MaximumVStartup[i][j][k],
3841 v->GPUVMMaxPageTableLevels,
3842 v->GPUVMEnable,
3843 v->HostVMEnable,
3844 v->HostVMMaxNonCachedPageTableLevels,
3845 v->HostVMMinPageSize,
3846 v->DynamicMetadataEnable[k],
3847 v->DynamicMetadataVMEnabled,
3848 v->DynamicMetadataLinesBeforeActiveRequired[k],
3849 v->DynamicMetadataTransmittedBytes[k],
3850 v->UrgLatency[i],
3851 v->ExtraLatency,
3852 v->TimeCalc,
3853 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3854 v->MetaRowBytes[i][j][k],
3855 v->DPTEBytesPerRow[i][j][k],
3856 v->PrefetchLinesY[i][j][k],
3857 v->SwathWidthYThisState[k],
3858 v->PrefillY[k],
3859 v->MaxNumSwY[k],
3860 v->PrefetchLinesC[i][j][k],
3861 v->SwathWidthCThisState[k],
3862 v->PrefillC[k],
3863 v->MaxNumSwC[k],
3864 v->swath_width_luma_ub_this_state[k],
3865 v->swath_width_chroma_ub_this_state[k],
3866 v->SwathHeightYThisState[k],
3867 v->SwathHeightCThisState[k],
3868 v->TWait,
3869 &v->DSTXAfterScaler[k],
3870 &v->DSTYAfterScaler[k],
3871 &v->LineTimesForPrefetch[k],
3872 &v->PrefetchBW[k],
3873 &v->LinesForMetaPTE[k],
3874 &v->LinesForMetaAndDPTERow[k],
3875 &v->VRatioPreY[i][j][k],
3876 &v->VRatioPreC[i][j][k],
3877 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3878 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3879 &v->NoTimeForDynamicMetadata[i][j][k],
3880 &v->Tno_bw[k],
3881 &v->prefetch_vmrow_bw[k],
3882 &v->dummy7[k],
3883 &v->dummy8[k],
3884 &v->dummy13[k],
3885 &v->VUpdateOffsetPix[k],
3886 &v->VUpdateWidthPix[k],
3887 &v->VReadyOffsetPix[k]);
3888 }
3889
3890 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3891 {
3892 struct vba_vars_st *v = &mode_lib->vba;
3893
3894 int i, j;
3895 unsigned int k, m;
3896 int ReorderingBytes;
3897 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3898 bool NoChroma = true;
3899 bool EnoughWritebackUnits = true;
3900 bool P2IWith420 = false;
3901 bool DSCOnlyIfNecessaryWithBPP = false;
3902 bool DSC422NativeNotSupported = false;
3903 double MaxTotalVActiveRDBandwidth;
3904 bool ViewportExceedsSurface = false;
3905 bool FMTBufferExceeded = false;
3906
3907 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3908
3909 CalculateMinAndMaxPrefetchMode(
3910 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3911 &MinPrefetchMode, &MaxPrefetchMode);
3912
3913 /*Scale Ratio, taps Support Check*/
3914
3915 v->ScaleRatioAndTapsSupport = true;
3916 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3917 if (v->ScalerEnabled[k] == false
3918 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3919 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3920 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3921 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3922 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3923 v->ScaleRatioAndTapsSupport = false;
3924 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3925 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3926 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3927 || v->VRatio[k] > v->vtaps[k]
3928 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3929 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3930 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3931 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3932 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3933 || v->HRatioChroma[k] > v->MaxHSCLRatio
3934 || v->VRatioChroma[k] > v->MaxVSCLRatio
3935 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3936 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3937 v->ScaleRatioAndTapsSupport = false;
3938 }
3939 }
3940 /*Source Format, Pixel Format and Scan Support Check*/
3941
3942 v->SourceFormatPixelAndScanSupport = true;
3943 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3944 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3945 v->SourceFormatPixelAndScanSupport = false;
3946 }
3947 }
3948 /*Bandwidth Support Check*/
3949
3950 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3951 CalculateBytePerPixelAnd256BBlockSizes(
3952 v->SourcePixelFormat[k],
3953 v->SurfaceTiling[k],
3954 &v->BytePerPixelY[k],
3955 &v->BytePerPixelC[k],
3956 &v->BytePerPixelInDETY[k],
3957 &v->BytePerPixelInDETC[k],
3958 &v->Read256BlockHeightY[k],
3959 &v->Read256BlockHeightC[k],
3960 &v->Read256BlockWidthY[k],
3961 &v->Read256BlockWidthC[k]);
3962 }
3963 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3964 if (v->SourceScan[k] != dm_vert) {
3965 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3966 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3967 } else {
3968 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3969 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3970 }
3971 }
3972 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3973 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3974 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3975 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3976 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3977 }
3978 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3979 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3980 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3981 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3982 } else if (v->WritebackEnable[k] == true) {
3983 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3984 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3985 } else {
3986 v->WriteBandwidth[k] = 0.0;
3987 }
3988 }
3989
3990 /*Writeback Latency support check*/
3991
3992 v->WritebackLatencySupport = true;
3993 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3994 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3995 v->WritebackLatencySupport = false;
3996 }
3997 }
3998
3999 /*Writeback Mode Support Check*/
4000
4001 v->TotalNumberOfActiveWriteback = 0;
4002 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4003 if (v->WritebackEnable[k] == true) {
4004 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4005 }
4006 }
4007
4008 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4009 EnoughWritebackUnits = false;
4010 }
4011
4012 /*Writeback Scale Ratio and Taps Support Check*/
4013
4014 v->WritebackScaleRatioAndTapsSupport = true;
4015 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4016 if (v->WritebackEnable[k] == true) {
4017 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4018 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4019 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4020 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4021 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4022 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4023 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4024 v->WritebackScaleRatioAndTapsSupport = false;
4025 }
4026 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4027 v->WritebackScaleRatioAndTapsSupport = false;
4028 }
4029 }
4030 }
4031 /*Maximum DISPCLK/DPPCLK Support check*/
4032
4033 v->WritebackRequiredDISPCLK = 0.0;
4034 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4035 if (v->WritebackEnable[k] == true) {
4036 v->WritebackRequiredDISPCLK = dml_max(
4037 v->WritebackRequiredDISPCLK,
4038 dml314_CalculateWriteBackDISPCLK(
4039 v->WritebackPixelFormat[k],
4040 v->PixelClock[k],
4041 v->WritebackHRatio[k],
4042 v->WritebackVRatio[k],
4043 v->WritebackHTaps[k],
4044 v->WritebackVTaps[k],
4045 v->WritebackSourceWidth[k],
4046 v->WritebackDestinationWidth[k],
4047 v->HTotal[k],
4048 v->WritebackLineBufferSize));
4049 }
4050 }
4051 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4052 if (v->HRatio[k] > 1.0) {
4053 v->PSCL_FACTOR[k] = dml_min(
4054 v->MaxDCHUBToPSCLThroughput,
4055 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4056 } else {
4057 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4058 }
4059 if (v->BytePerPixelC[k] == 0.0) {
4060 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4061 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4062 * dml_max3(
4063 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4064 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4065 1.0);
4066 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4067 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4068 }
4069 } else {
4070 if (v->HRatioChroma[k] > 1.0) {
4071 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4072 v->MaxDCHUBToPSCLThroughput,
4073 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4074 } else {
4075 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4076 }
4077 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4078 * dml_max5(
4079 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4080 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4081 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4082 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4083 1.0);
4084 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4085 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4086 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4087 }
4088 }
4089 }
4090 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4091 int MaximumSwathWidthSupportLuma;
4092 int MaximumSwathWidthSupportChroma;
4093
4094 if (v->SurfaceTiling[k] == dm_sw_linear) {
4095 MaximumSwathWidthSupportLuma = 8192.0;
4096 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4097 MaximumSwathWidthSupportLuma = 2880.0;
4098 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4099 MaximumSwathWidthSupportLuma = 3840.0;
4100 } else {
4101 MaximumSwathWidthSupportLuma = 5760.0;
4102 }
4103
4104 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4105 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4106 } else {
4107 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4108 }
4109 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4110 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4111 if (v->BytePerPixelC[k] == 0.0) {
4112 v->MaximumSwathWidthInLineBufferChroma = 0;
4113 } else {
4114 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4115 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4116 }
4117 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4118 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4119 }
4120
4121 CalculateSwathAndDETConfiguration(
4122 true,
4123 v->NumberOfActivePlanes,
4124 v->DETBufferSizeInKByte[0],
4125 v->MaximumSwathWidthLuma,
4126 v->MaximumSwathWidthChroma,
4127 v->SourceScan,
4128 v->SourcePixelFormat,
4129 v->SurfaceTiling,
4130 v->ViewportWidth,
4131 v->ViewportHeight,
4132 v->SurfaceWidthY,
4133 v->SurfaceWidthC,
4134 v->SurfaceHeightY,
4135 v->SurfaceHeightC,
4136 v->Read256BlockHeightY,
4137 v->Read256BlockHeightC,
4138 v->Read256BlockWidthY,
4139 v->Read256BlockWidthC,
4140 v->odm_combine_dummy,
4141 v->BlendingAndTiming,
4142 v->BytePerPixelY,
4143 v->BytePerPixelC,
4144 v->BytePerPixelInDETY,
4145 v->BytePerPixelInDETC,
4146 v->HActive,
4147 v->HRatio,
4148 v->HRatioChroma,
4149 v->NoOfDPPThisState,
4150 v->swath_width_luma_ub_this_state,
4151 v->swath_width_chroma_ub_this_state,
4152 v->SwathWidthYThisState,
4153 v->SwathWidthCThisState,
4154 v->SwathHeightYThisState,
4155 v->SwathHeightCThisState,
4156 v->DETBufferSizeYThisState,
4157 v->DETBufferSizeCThisState,
4158 v->SingleDPPViewportSizeSupportPerPlane,
4159 &v->ViewportSizeSupport[0][0]);
4160
4161 for (i = 0; i < v->soc.num_states; i++) {
4162 for (j = 0; j < 2; j++) {
4163 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4164 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4165 v->RequiredDISPCLK[i][j] = 0.0;
4166 v->DISPCLK_DPPCLK_Support[i][j] = true;
4167 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4168 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4169 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4170 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4171 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4172 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4173 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4174 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4175 }
4176 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4177 * (1 + v->DISPCLKRampingMargin / 100.0);
4178 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4179 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4180 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4181 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4182 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4183 }
4184 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4185 * (1 + v->DISPCLKRampingMargin / 100.0);
4186 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4187 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4188 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4189 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4190 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4191 }
4192
4193 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4194 || !(v->Output[k] == dm_dp ||
4195 v->Output[k] == dm_dp2p0 ||
4196 v->Output[k] == dm_edp)) {
4197 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4198 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4199
4200 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4201 FMTBufferExceeded = true;
4202 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4203 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4204 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4205 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4206 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4207 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4208 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4209 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4210 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4211 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4212 } else {
4213 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4214 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4215 }
4216 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4217 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4218 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4219 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4220 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4221 } else {
4222 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4223 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4224 }
4225 }
4226 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4227 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4228 if (v->Output[k] == dm_hdmi) {
4229 FMTBufferExceeded = true;
4230 } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4231 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4232 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4233
4234 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4235 FMTBufferExceeded = true;
4236 } else {
4237 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4238 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4239 }
4240 }
4241 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4242 v->MPCCombine[i][j][k] = false;
4243 v->NoOfDPP[i][j][k] = 4;
4244 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4245 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4246 v->MPCCombine[i][j][k] = false;
4247 v->NoOfDPP[i][j][k] = 2;
4248 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4249 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4250 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4251 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4252 v->MPCCombine[i][j][k] = false;
4253 v->NoOfDPP[i][j][k] = 1;
4254 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4255 } else {
4256 v->MPCCombine[i][j][k] = true;
4257 v->NoOfDPP[i][j][k] = 2;
4258 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4259 }
4260 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4261 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4262 > v->MaxDppclkRoundedDownToDFSGranularity)
4263 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4264 v->DISPCLK_DPPCLK_Support[i][j] = false;
4265 }
4266 }
4267 v->TotalNumberOfActiveDPP[i][j] = 0;
4268 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4269 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4270 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4271 if (v->NoOfDPP[i][j][k] == 1)
4272 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4273 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4274 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4275 NoChroma = false;
4276 }
4277
4278 // UPTO
4279 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4280 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4281 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4282 double BWOfNonSplitPlaneOfMaximumBandwidth;
4283 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4284
4285 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4286 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4287 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4288 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4289 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4290 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4291 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4292 }
4293 }
4294 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4295 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4296 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4297 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4298 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4299 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4300 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4301 }
4302 }
4303 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4304 v->RequiredDISPCLK[i][j] = 0.0;
4305 v->DISPCLK_DPPCLK_Support[i][j] = true;
4306 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4307 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4308 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4309 v->MPCCombine[i][j][k] = true;
4310 v->NoOfDPP[i][j][k] = 2;
4311 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4312 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4313 } else {
4314 v->MPCCombine[i][j][k] = false;
4315 v->NoOfDPP[i][j][k] = 1;
4316 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4317 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4318 }
4319 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4320 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4321 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4322 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4323 } else {
4324 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4325 }
4326 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4327 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4328 > v->MaxDppclkRoundedDownToDFSGranularity)
4329 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4330 v->DISPCLK_DPPCLK_Support[i][j] = false;
4331 }
4332 }
4333 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4334 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4335 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4336 }
4337 }
4338 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4339 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4340 v->DISPCLK_DPPCLK_Support[i][j] = false;
4341 }
4342 }
4343 }
4344
4345 /*Total Available Pipes Support Check*/
4346
4347 for (i = 0; i < v->soc.num_states; i++) {
4348 for (j = 0; j < 2; j++) {
4349 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4350 v->TotalAvailablePipesSupport[i][j] = true;
4351 } else {
4352 v->TotalAvailablePipesSupport[i][j] = false;
4353 }
4354 }
4355 }
4356 /*Display IO and DSC Support Check*/
4357
4358 v->NonsupportedDSCInputBPC = false;
4359 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4360 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4361 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4362 v->NonsupportedDSCInputBPC = true;
4363 }
4364 }
4365
4366 /*Number Of DSC Slices*/
4367 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4368 if (v->BlendingAndTiming[k] == k) {
4369 if (v->PixelClockBackEnd[k] > 3200) {
4370 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4371 } else if (v->PixelClockBackEnd[k] > 1360) {
4372 v->NumberOfDSCSlices[k] = 8;
4373 } else if (v->PixelClockBackEnd[k] > 680) {
4374 v->NumberOfDSCSlices[k] = 4;
4375 } else if (v->PixelClockBackEnd[k] > 340) {
4376 v->NumberOfDSCSlices[k] = 2;
4377 } else {
4378 v->NumberOfDSCSlices[k] = 1;
4379 }
4380 } else {
4381 v->NumberOfDSCSlices[k] = 0;
4382 }
4383 }
4384
4385 for (i = 0; i < v->soc.num_states; i++) {
4386 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4387 v->RequiresDSC[i][k] = false;
4388 v->RequiresFEC[i][k] = false;
4389 if (v->BlendingAndTiming[k] == k) {
4390 if (v->Output[k] == dm_hdmi) {
4391 v->RequiresDSC[i][k] = false;
4392 v->RequiresFEC[i][k] = false;
4393 v->OutputBppPerState[i][k] = TruncToValidBPP(
4394 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4395 3,
4396 v->HTotal[k],
4397 v->HActive[k],
4398 v->PixelClockBackEnd[k],
4399 v->ForcedOutputLinkBPP[k],
4400 false,
4401 v->Output[k],
4402 v->OutputFormat[k],
4403 v->DSCInputBitPerComponent[k],
4404 v->NumberOfDSCSlices[k],
4405 v->AudioSampleRate[k],
4406 v->AudioSampleLayout[k],
4407 v->ODMCombineEnablePerState[i][k]);
4408 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4409 if (v->DSCEnable[k] == true) {
4410 v->RequiresDSC[i][k] = true;
4411 v->LinkDSCEnable = true;
4412 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4413 v->RequiresFEC[i][k] = true;
4414 } else {
4415 v->RequiresFEC[i][k] = false;
4416 }
4417 } else {
4418 v->RequiresDSC[i][k] = false;
4419 v->LinkDSCEnable = false;
4420 if (v->Output[k] == dm_dp2p0) {
4421 v->RequiresFEC[i][k] = true;
4422 } else {
4423 v->RequiresFEC[i][k] = false;
4424 }
4425 }
4426 if (v->Output[k] == dm_dp2p0) {
4427 v->Outbpp = BPP_INVALID;
4428 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4429 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4430 v->Outbpp = TruncToValidBPP(
4431 (1.0 - v->Downspreading / 100.0) * 10000,
4432 v->OutputLinkDPLanes[k],
4433 v->HTotal[k],
4434 v->HActive[k],
4435 v->PixelClockBackEnd[k],
4436 v->ForcedOutputLinkBPP[k],
4437 v->LinkDSCEnable,
4438 v->Output[k],
4439 v->OutputFormat[k],
4440 v->DSCInputBitPerComponent[k],
4441 v->NumberOfDSCSlices[k],
4442 v->AudioSampleRate[k],
4443 v->AudioSampleLayout[k],
4444 v->ODMCombineEnablePerState[i][k]);
4445 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4446 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4447 v->RequiresDSC[i][k] = true;
4448 v->LinkDSCEnable = true;
4449 v->Outbpp = TruncToValidBPP(
4450 (1.0 - v->Downspreading / 100.0) * 10000,
4451 v->OutputLinkDPLanes[k],
4452 v->HTotal[k],
4453 v->HActive[k],
4454 v->PixelClockBackEnd[k],
4455 v->ForcedOutputLinkBPP[k],
4456 v->LinkDSCEnable,
4457 v->Output[k],
4458 v->OutputFormat[k],
4459 v->DSCInputBitPerComponent[k],
4460 v->NumberOfDSCSlices[k],
4461 v->AudioSampleRate[k],
4462 v->AudioSampleLayout[k],
4463 v->ODMCombineEnablePerState[i][k]);
4464 }
4465 v->OutputBppPerState[i][k] = v->Outbpp;
4466 // TODO: Need some other way to handle this nonsense
4467 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4468 }
4469 if (v->Outbpp == BPP_INVALID &&
4470 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4471 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4472 v->Outbpp = TruncToValidBPP(
4473 (1.0 - v->Downspreading / 100.0) * 13500,
4474 v->OutputLinkDPLanes[k],
4475 v->HTotal[k],
4476 v->HActive[k],
4477 v->PixelClockBackEnd[k],
4478 v->ForcedOutputLinkBPP[k],
4479 v->LinkDSCEnable,
4480 v->Output[k],
4481 v->OutputFormat[k],
4482 v->DSCInputBitPerComponent[k],
4483 v->NumberOfDSCSlices[k],
4484 v->AudioSampleRate[k],
4485 v->AudioSampleLayout[k],
4486 v->ODMCombineEnablePerState[i][k]);
4487 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4488 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4489 v->RequiresDSC[i][k] = true;
4490 v->LinkDSCEnable = true;
4491 v->Outbpp = TruncToValidBPP(
4492 (1.0 - v->Downspreading / 100.0) * 13500,
4493 v->OutputLinkDPLanes[k],
4494 v->HTotal[k],
4495 v->HActive[k],
4496 v->PixelClockBackEnd[k],
4497 v->ForcedOutputLinkBPP[k],
4498 v->LinkDSCEnable,
4499 v->Output[k],
4500 v->OutputFormat[k],
4501 v->DSCInputBitPerComponent[k],
4502 v->NumberOfDSCSlices[k],
4503 v->AudioSampleRate[k],
4504 v->AudioSampleLayout[k],
4505 v->ODMCombineEnablePerState[i][k]);
4506 }
4507 v->OutputBppPerState[i][k] = v->Outbpp;
4508 // TODO: Need some other way to handle this nonsense
4509 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4510 }
4511 if (v->Outbpp == BPP_INVALID &&
4512 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4513 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4514 v->Outbpp = TruncToValidBPP(
4515 (1.0 - v->Downspreading / 100.0) * 20000,
4516 v->OutputLinkDPLanes[k],
4517 v->HTotal[k],
4518 v->HActive[k],
4519 v->PixelClockBackEnd[k],
4520 v->ForcedOutputLinkBPP[k],
4521 v->LinkDSCEnable,
4522 v->Output[k],
4523 v->OutputFormat[k],
4524 v->DSCInputBitPerComponent[k],
4525 v->NumberOfDSCSlices[k],
4526 v->AudioSampleRate[k],
4527 v->AudioSampleLayout[k],
4528 v->ODMCombineEnablePerState[i][k]);
4529 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4530 v->ForcedOutputLinkBPP[k] == 0) {
4531 v->RequiresDSC[i][k] = true;
4532 v->LinkDSCEnable = true;
4533 v->Outbpp = TruncToValidBPP(
4534 (1.0 - v->Downspreading / 100.0) * 20000,
4535 v->OutputLinkDPLanes[k],
4536 v->HTotal[k],
4537 v->HActive[k],
4538 v->PixelClockBackEnd[k],
4539 v->ForcedOutputLinkBPP[k],
4540 v->LinkDSCEnable,
4541 v->Output[k],
4542 v->OutputFormat[k],
4543 v->DSCInputBitPerComponent[k],
4544 v->NumberOfDSCSlices[k],
4545 v->AudioSampleRate[k],
4546 v->AudioSampleLayout[k],
4547 v->ODMCombineEnablePerState[i][k]);
4548 }
4549 v->OutputBppPerState[i][k] = v->Outbpp;
4550 // TODO: Need some other way to handle this nonsense
4551 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4552 }
4553 } else {
4554 v->Outbpp = BPP_INVALID;
4555 if (v->PHYCLKPerState[i] >= 270.0) {
4556 v->Outbpp = TruncToValidBPP(
4557 (1.0 - v->Downspreading / 100.0) * 2700,
4558 v->OutputLinkDPLanes[k],
4559 v->HTotal[k],
4560 v->HActive[k],
4561 v->PixelClockBackEnd[k],
4562 v->ForcedOutputLinkBPP[k],
4563 v->LinkDSCEnable,
4564 v->Output[k],
4565 v->OutputFormat[k],
4566 v->DSCInputBitPerComponent[k],
4567 v->NumberOfDSCSlices[k],
4568 v->AudioSampleRate[k],
4569 v->AudioSampleLayout[k],
4570 v->ODMCombineEnablePerState[i][k]);
4571 v->OutputBppPerState[i][k] = v->Outbpp;
4572 // TODO: Need some other way to handle this nonsense
4573 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4574 }
4575 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4576 v->Outbpp = TruncToValidBPP(
4577 (1.0 - v->Downspreading / 100.0) * 5400,
4578 v->OutputLinkDPLanes[k],
4579 v->HTotal[k],
4580 v->HActive[k],
4581 v->PixelClockBackEnd[k],
4582 v->ForcedOutputLinkBPP[k],
4583 v->LinkDSCEnable,
4584 v->Output[k],
4585 v->OutputFormat[k],
4586 v->DSCInputBitPerComponent[k],
4587 v->NumberOfDSCSlices[k],
4588 v->AudioSampleRate[k],
4589 v->AudioSampleLayout[k],
4590 v->ODMCombineEnablePerState[i][k]);
4591 v->OutputBppPerState[i][k] = v->Outbpp;
4592 // TODO: Need some other way to handle this nonsense
4593 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4594 }
4595 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4596 v->Outbpp = TruncToValidBPP(
4597 (1.0 - v->Downspreading / 100.0) * 8100,
4598 v->OutputLinkDPLanes[k],
4599 v->HTotal[k],
4600 v->HActive[k],
4601 v->PixelClockBackEnd[k],
4602 v->ForcedOutputLinkBPP[k],
4603 v->LinkDSCEnable,
4604 v->Output[k],
4605 v->OutputFormat[k],
4606 v->DSCInputBitPerComponent[k],
4607 v->NumberOfDSCSlices[k],
4608 v->AudioSampleRate[k],
4609 v->AudioSampleLayout[k],
4610 v->ODMCombineEnablePerState[i][k]);
4611 v->OutputBppPerState[i][k] = v->Outbpp;
4612 // TODO: Need some other way to handle this nonsense
4613 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4614 }
4615 }
4616 }
4617 } else {
4618 v->OutputBppPerState[i][k] = 0;
4619 }
4620 }
4621 }
4622
4623 for (i = 0; i < v->soc.num_states; i++) {
4624 v->LinkCapacitySupport[i] = true;
4625 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4626 if (v->BlendingAndTiming[k] == k
4627 && (v->Output[k] == dm_dp ||
4628 v->Output[k] == dm_edp ||
4629 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4630 v->LinkCapacitySupport[i] = false;
4631 }
4632 }
4633 }
4634
4635 // UPTO 2172
4636 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4637 if (v->BlendingAndTiming[k] == k
4638 && (v->Output[k] == dm_dp ||
4639 v->Output[k] == dm_edp ||
4640 v->Output[k] == dm_hdmi)) {
4641 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4642 P2IWith420 = true;
4643 }
4644 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4645 && !v->DSC422NativeSupport) {
4646 DSC422NativeNotSupported = true;
4647 }
4648 }
4649 }
4650
4651
4652 for (i = 0; i < v->soc.num_states; ++i) {
4653 v->ODMCombine4To1SupportCheckOK[i] = true;
4654 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4655 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4656 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4657 || v->Output[k] == dm_hdmi)) {
4658 v->ODMCombine4To1SupportCheckOK[i] = false;
4659 }
4660 }
4661 }
4662
4663 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4664
4665 for (i = 0; i < v->soc.num_states; i++) {
4666 v->NotEnoughDSCUnits[i] = false;
4667 v->TotalDSCUnitsRequired = 0.0;
4668 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4669 if (v->RequiresDSC[i][k] == true) {
4670 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4671 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4672 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4673 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4674 } else {
4675 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4676 }
4677 }
4678 }
4679 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4680 v->NotEnoughDSCUnits[i] = true;
4681 }
4682 }
4683 /*DSC Delay per state*/
4684
4685 for (i = 0; i < v->soc.num_states; i++) {
4686 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4687 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4688 v->BPP = 0.0;
4689 } else {
4690 v->BPP = v->OutputBppPerState[i][k];
4691 }
4692 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4693 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4694 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4695 v->DSCInputBitPerComponent[k],
4696 v->BPP,
4697 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4698 v->NumberOfDSCSlices[k],
4699 v->OutputFormat[k],
4700 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4701 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4702 v->DSCDelayPerState[i][k] = 2.0
4703 * (dscceComputeDelay(
4704 v->DSCInputBitPerComponent[k],
4705 v->BPP,
4706 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4707 v->NumberOfDSCSlices[k] / 2,
4708 v->OutputFormat[k],
4709 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4710 } else {
4711 v->DSCDelayPerState[i][k] = 4.0
4712 * (dscceComputeDelay(
4713 v->DSCInputBitPerComponent[k],
4714 v->BPP,
4715 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4716 v->NumberOfDSCSlices[k] / 4,
4717 v->OutputFormat[k],
4718 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4719 }
4720 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4721 } else {
4722 v->DSCDelayPerState[i][k] = 0.0;
4723 }
4724 }
4725 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4726 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4727 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4728 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4729 }
4730 }
4731 }
4732 }
4733
4734 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4735 //
4736 for (i = 0; i < v->soc.num_states; ++i) {
4737 for (j = 0; j <= 1; ++j) {
4738 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4739 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4740 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4741 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4742 }
4743
4744 CalculateSwathAndDETConfiguration(
4745 false,
4746 v->NumberOfActivePlanes,
4747 v->DETBufferSizeInKByte[0],
4748 v->MaximumSwathWidthLuma,
4749 v->MaximumSwathWidthChroma,
4750 v->SourceScan,
4751 v->SourcePixelFormat,
4752 v->SurfaceTiling,
4753 v->ViewportWidth,
4754 v->ViewportHeight,
4755 v->SurfaceWidthY,
4756 v->SurfaceWidthC,
4757 v->SurfaceHeightY,
4758 v->SurfaceHeightC,
4759 v->Read256BlockHeightY,
4760 v->Read256BlockHeightC,
4761 v->Read256BlockWidthY,
4762 v->Read256BlockWidthC,
4763 v->ODMCombineEnableThisState,
4764 v->BlendingAndTiming,
4765 v->BytePerPixelY,
4766 v->BytePerPixelC,
4767 v->BytePerPixelInDETY,
4768 v->BytePerPixelInDETC,
4769 v->HActive,
4770 v->HRatio,
4771 v->HRatioChroma,
4772 v->NoOfDPPThisState,
4773 v->swath_width_luma_ub_this_state,
4774 v->swath_width_chroma_ub_this_state,
4775 v->SwathWidthYThisState,
4776 v->SwathWidthCThisState,
4777 v->SwathHeightYThisState,
4778 v->SwathHeightCThisState,
4779 v->DETBufferSizeYThisState,
4780 v->DETBufferSizeCThisState,
4781 v->dummystring,
4782 &v->ViewportSizeSupport[i][j]);
4783
4784 CalculateDCFCLKDeepSleep(
4785 mode_lib,
4786 v->NumberOfActivePlanes,
4787 v->BytePerPixelY,
4788 v->BytePerPixelC,
4789 v->VRatio,
4790 v->VRatioChroma,
4791 v->SwathWidthYThisState,
4792 v->SwathWidthCThisState,
4793 v->NoOfDPPThisState,
4794 v->HRatio,
4795 v->HRatioChroma,
4796 v->PixelClock,
4797 v->PSCL_FACTOR,
4798 v->PSCL_FACTOR_CHROMA,
4799 v->RequiredDPPCLKThisState,
4800 v->ReadBandwidthLuma,
4801 v->ReadBandwidthChroma,
4802 v->ReturnBusWidth,
4803 &v->ProjectedDCFCLKDeepSleep[i][j]);
4804
4805 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4806 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4807 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4808 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4809 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4810 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4811 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4812 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4813 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4814 }
4815 }
4816 }
4817
4818 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4819 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4820 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4821 }
4822
4823 for (i = 0; i < v->soc.num_states; i++) {
4824 for (j = 0; j < 2; j++) {
4825 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4826
4827 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4828 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4829 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4830 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4831 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4832 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4833 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4834 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4835 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4836 }
4837
4838 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4839 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4840 if (v->DCCEnable[k] == true) {
4841 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4842 }
4843 }
4844
4845 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4846 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4847 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4848
4849 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4850 && v->SourceScan[k] != dm_vert) {
4851 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4852 / 2;
4853 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4854 } else {
4855 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4856 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4857 }
4858
4859 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4860 mode_lib,
4861 v->DCCEnable[k],
4862 v->Read256BlockHeightC[k],
4863 v->Read256BlockWidthC[k],
4864 v->SourcePixelFormat[k],
4865 v->SurfaceTiling[k],
4866 v->BytePerPixelC[k],
4867 v->SourceScan[k],
4868 v->SwathWidthCThisState[k],
4869 v->ViewportHeightChroma[k],
4870 v->GPUVMEnable,
4871 v->HostVMEnable,
4872 v->HostVMMaxNonCachedPageTableLevels,
4873 v->GPUVMMinPageSize,
4874 v->HostVMMinPageSize,
4875 v->PTEBufferSizeInRequestsForChroma,
4876 v->PitchC[k],
4877 0.0,
4878 &v->MacroTileWidthC[k],
4879 &v->MetaRowBytesC,
4880 &v->DPTEBytesPerRowC,
4881 &v->PTEBufferSizeNotExceededC[i][j][k],
4882 &v->dummyinteger7,
4883 &v->dpte_row_height_chroma[k],
4884 &v->dummyinteger28,
4885 &v->dummyinteger26,
4886 &v->dummyinteger23,
4887 &v->meta_row_height_chroma[k],
4888 &v->dummyinteger8,
4889 &v->dummyinteger9,
4890 &v->dummyinteger19,
4891 &v->dummyinteger20,
4892 &v->dummyinteger17,
4893 &v->dummyinteger10,
4894 &v->dummyinteger11);
4895
4896 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4897 mode_lib,
4898 v->VRatioChroma[k],
4899 v->VTAPsChroma[k],
4900 v->Interlace[k],
4901 v->ProgressiveToInterlaceUnitInOPP,
4902 v->SwathHeightCThisState[k],
4903 v->ViewportYStartC[k],
4904 &v->PrefillC[k],
4905 &v->MaxNumSwC[k]);
4906 } else {
4907 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4908 v->PTEBufferSizeInRequestsForChroma = 0;
4909 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4910 v->MetaRowBytesC = 0.0;
4911 v->DPTEBytesPerRowC = 0.0;
4912 v->PrefetchLinesC[i][j][k] = 0.0;
4913 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4914 }
4915 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4916 mode_lib,
4917 v->DCCEnable[k],
4918 v->Read256BlockHeightY[k],
4919 v->Read256BlockWidthY[k],
4920 v->SourcePixelFormat[k],
4921 v->SurfaceTiling[k],
4922 v->BytePerPixelY[k],
4923 v->SourceScan[k],
4924 v->SwathWidthYThisState[k],
4925 v->ViewportHeight[k],
4926 v->GPUVMEnable,
4927 v->HostVMEnable,
4928 v->HostVMMaxNonCachedPageTableLevels,
4929 v->GPUVMMinPageSize,
4930 v->HostVMMinPageSize,
4931 v->PTEBufferSizeInRequestsForLuma,
4932 v->PitchY[k],
4933 v->DCCMetaPitchY[k],
4934 &v->MacroTileWidthY[k],
4935 &v->MetaRowBytesY,
4936 &v->DPTEBytesPerRowY,
4937 &v->PTEBufferSizeNotExceededY[i][j][k],
4938 &v->dummyinteger7,
4939 &v->dpte_row_height[k],
4940 &v->dummyinteger29,
4941 &v->dummyinteger27,
4942 &v->dummyinteger24,
4943 &v->meta_row_height[k],
4944 &v->dummyinteger25,
4945 &v->dpte_group_bytes[k],
4946 &v->dummyinteger21,
4947 &v->dummyinteger22,
4948 &v->dummyinteger18,
4949 &v->dummyinteger5,
4950 &v->dummyinteger6);
4951 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4952 mode_lib,
4953 v->VRatio[k],
4954 v->vtaps[k],
4955 v->Interlace[k],
4956 v->ProgressiveToInterlaceUnitInOPP,
4957 v->SwathHeightYThisState[k],
4958 v->ViewportYStartY[k],
4959 &v->PrefillY[k],
4960 &v->MaxNumSwY[k]);
4961 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4962 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4963 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4964
4965 CalculateRowBandwidth(
4966 v->GPUVMEnable,
4967 v->SourcePixelFormat[k],
4968 v->VRatio[k],
4969 v->VRatioChroma[k],
4970 v->DCCEnable[k],
4971 v->HTotal[k] / v->PixelClock[k],
4972 v->MetaRowBytesY,
4973 v->MetaRowBytesC,
4974 v->meta_row_height[k],
4975 v->meta_row_height_chroma[k],
4976 v->DPTEBytesPerRowY,
4977 v->DPTEBytesPerRowC,
4978 v->dpte_row_height[k],
4979 v->dpte_row_height_chroma[k],
4980 &v->meta_row_bandwidth[i][j][k],
4981 &v->dpte_row_bandwidth[i][j][k]);
4982 }
4983 /*
4984 * DCCMetaBufferSizeSupport(i, j) = True
4985 * For k = 0 To NumberOfActivePlanes - 1
4986 * If MetaRowBytes(i, j, k) > 24064 Then
4987 * DCCMetaBufferSizeSupport(i, j) = False
4988 * End If
4989 * Next k
4990 */
4991 v->DCCMetaBufferSizeSupport[i][j] = true;
4992 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4993 if (v->MetaRowBytes[i][j][k] > 24064)
4994 v->DCCMetaBufferSizeSupport[i][j] = false;
4995 }
4996 v->UrgLatency[i] = CalculateUrgentLatency(
4997 v->UrgentLatencyPixelDataOnly,
4998 v->UrgentLatencyPixelMixedWithVMData,
4999 v->UrgentLatencyVMDataOnly,
5000 v->DoUrgentLatencyAdjustment,
5001 v->UrgentLatencyAdjustmentFabricClockComponent,
5002 v->UrgentLatencyAdjustmentFabricClockReference,
5003 v->FabricClockPerState[i]);
5004
5005 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5006 CalculateUrgentBurstFactor(
5007 v->swath_width_luma_ub_this_state[k],
5008 v->swath_width_chroma_ub_this_state[k],
5009 v->SwathHeightYThisState[k],
5010 v->SwathHeightCThisState[k],
5011 v->HTotal[k] / v->PixelClock[k],
5012 v->UrgLatency[i],
5013 v->CursorBufferSize,
5014 v->CursorWidth[k][0],
5015 v->CursorBPP[k][0],
5016 v->VRatio[k],
5017 v->VRatioChroma[k],
5018 v->BytePerPixelInDETY[k],
5019 v->BytePerPixelInDETC[k],
5020 v->DETBufferSizeYThisState[k],
5021 v->DETBufferSizeCThisState[k],
5022 &v->UrgentBurstFactorCursor[k],
5023 &v->UrgentBurstFactorLuma[k],
5024 &v->UrgentBurstFactorChroma[k],
5025 &NotUrgentLatencyHiding[k]);
5026 }
5027
5028 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5029 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5030 if (NotUrgentLatencyHiding[k]) {
5031 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5032 }
5033 }
5034
5035 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5036 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5037 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5038 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5039 }
5040
5041 v->TotalVActivePixelBandwidth[i][j] = 0;
5042 v->TotalVActiveCursorBandwidth[i][j] = 0;
5043 v->TotalMetaRowBandwidth[i][j] = 0;
5044 v->TotalDPTERowBandwidth[i][j] = 0;
5045 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5046 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5047 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5048 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5049 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5050 }
5051 }
5052 }
5053
5054 //Calculate Return BW
5055 for (i = 0; i < v->soc.num_states; ++i) {
5056 for (j = 0; j <= 1; ++j) {
5057 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5058 if (v->BlendingAndTiming[k] == k) {
5059 if (v->WritebackEnable[k] == true) {
5060 v->WritebackDelayTime[k] = v->WritebackLatency
5061 + CalculateWriteBackDelay(
5062 v->WritebackPixelFormat[k],
5063 v->WritebackHRatio[k],
5064 v->WritebackVRatio[k],
5065 v->WritebackVTaps[k],
5066 v->WritebackDestinationWidth[k],
5067 v->WritebackDestinationHeight[k],
5068 v->WritebackSourceHeight[k],
5069 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5070 } else {
5071 v->WritebackDelayTime[k] = 0.0;
5072 }
5073 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5074 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5075 v->WritebackDelayTime[k] = dml_max(
5076 v->WritebackDelayTime[k],
5077 v->WritebackLatency
5078 + CalculateWriteBackDelay(
5079 v->WritebackPixelFormat[m],
5080 v->WritebackHRatio[m],
5081 v->WritebackVRatio[m],
5082 v->WritebackVTaps[m],
5083 v->WritebackDestinationWidth[m],
5084 v->WritebackDestinationHeight[m],
5085 v->WritebackSourceHeight[m],
5086 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5087 }
5088 }
5089 }
5090 }
5091 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5092 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5093 if (v->BlendingAndTiming[k] == m) {
5094 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5095 }
5096 }
5097 }
5098 v->MaxMaxVStartup[i][j] = 0;
5099 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5100 v->MaximumVStartup[i][j][k] =
5101 CalculateMaxVStartup(
5102 v->VTotal[k],
5103 v->VActive[k],
5104 v->VBlankNom[k],
5105 v->HTotal[k],
5106 v->PixelClock[k],
5107 v->ProgressiveToInterlaceUnitInOPP,
5108 v->Interlace[k],
5109 v->ip.VBlankNomDefaultUS,
5110 v->WritebackDelayTime[k]);
5111 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5112 }
5113 }
5114 }
5115
5116 ReorderingBytes = v->NumberOfChannels
5117 * dml_max3(
5118 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5119 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5120 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5121
5122 for (i = 0; i < v->soc.num_states; ++i) {
5123 for (j = 0; j <= 1; ++j) {
5124 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5125 }
5126 }
5127
5128 if (v->UseMinimumRequiredDCFCLK == true)
5129 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5130
5131 for (i = 0; i < v->soc.num_states; ++i) {
5132 for (j = 0; j <= 1; ++j) {
5133 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5134 v->ReturnBusWidth * v->DCFCLKState[i][j],
5135 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5136 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5137 double PixelDataOnlyReturnBWPerState = dml_min(
5138 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5139 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5140 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5141 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5142 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5143
5144 if (v->HostVMEnable != true) {
5145 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5146 } else {
5147 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5148 }
5149 }
5150 }
5151
5152 //Re-ordering Buffer Support Check
5153 for (i = 0; i < v->soc.num_states; ++i) {
5154 for (j = 0; j <= 1; ++j) {
5155 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5156 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5157 v->ROBSupport[i][j] = true;
5158 } else {
5159 v->ROBSupport[i][j] = false;
5160 }
5161 }
5162 }
5163
5164 //Vertical Active BW support check
5165
5166 MaxTotalVActiveRDBandwidth = 0;
5167 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5168 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5169 }
5170
5171 for (i = 0; i < v->soc.num_states; ++i) {
5172 for (j = 0; j <= 1; ++j) {
5173 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5174 dml_min(
5175 v->ReturnBusWidth * v->DCFCLKState[i][j],
5176 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5177 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5178 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5179 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5180
5181 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5182 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5183 } else {
5184 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5185 }
5186 }
5187 }
5188
5189 v->UrgentLatency = CalculateUrgentLatency(
5190 v->UrgentLatencyPixelDataOnly,
5191 v->UrgentLatencyPixelMixedWithVMData,
5192 v->UrgentLatencyVMDataOnly,
5193 v->DoUrgentLatencyAdjustment,
5194 v->UrgentLatencyAdjustmentFabricClockComponent,
5195 v->UrgentLatencyAdjustmentFabricClockReference,
5196 v->FabricClock);
5197 //Prefetch Check
5198 for (i = 0; i < v->soc.num_states; ++i) {
5199 for (j = 0; j <= 1; ++j) {
5200 double VMDataOnlyReturnBWPerState;
5201 double HostVMInefficiencyFactor = 1;
5202 int NextPrefetchModeState = MinPrefetchMode;
5203 bool UnboundedRequestEnabledThisState = false;
5204 int CompressedBufferSizeInkByteThisState = 0;
5205 double dummy;
5206
5207 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5208
5209 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5210 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5211 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5212 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5213 }
5214
5215 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5216 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5217 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5218 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5219 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5220 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5221 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5222 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5223 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5224 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5225 }
5226
5227 VMDataOnlyReturnBWPerState = dml_min(
5228 dml_min(
5229 v->ReturnBusWidth * v->DCFCLKState[i][j],
5230 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5231 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5232 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5233 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5234 if (v->GPUVMEnable && v->HostVMEnable)
5235 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5236
5237 v->ExtraLatency = CalculateExtraLatency(
5238 v->RoundTripPingLatencyCycles,
5239 ReorderingBytes,
5240 v->DCFCLKState[i][j],
5241 v->TotalNumberOfActiveDPP[i][j],
5242 v->PixelChunkSizeInKByte,
5243 v->TotalNumberOfDCCActiveDPP[i][j],
5244 v->MetaChunkSize,
5245 v->ReturnBWPerState[i][j],
5246 v->GPUVMEnable,
5247 v->HostVMEnable,
5248 v->NumberOfActivePlanes,
5249 v->NoOfDPPThisState,
5250 v->dpte_group_bytes,
5251 HostVMInefficiencyFactor,
5252 v->HostVMMinPageSize,
5253 v->HostVMMaxNonCachedPageTableLevels);
5254
5255 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5256 do {
5257 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5258 v->MaxVStartup = v->NextMaxVStartup;
5259
5260 v->TWait = CalculateTWait(
5261 v->PrefetchModePerState[i][j],
5262 v->DRAMClockChangeLatency,
5263 v->UrgLatency[i],
5264 v->SREnterPlusExitTime);
5265
5266 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5267 CalculatePrefetchSchedulePerPlane(mode_lib,
5268 HostVMInefficiencyFactor,
5269 i, j, k);
5270 }
5271
5272 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5273 CalculateUrgentBurstFactor(
5274 v->swath_width_luma_ub_this_state[k],
5275 v->swath_width_chroma_ub_this_state[k],
5276 v->SwathHeightYThisState[k],
5277 v->SwathHeightCThisState[k],
5278 v->HTotal[k] / v->PixelClock[k],
5279 v->UrgentLatency,
5280 v->CursorBufferSize,
5281 v->CursorWidth[k][0],
5282 v->CursorBPP[k][0],
5283 v->VRatioPreY[i][j][k],
5284 v->VRatioPreC[i][j][k],
5285 v->BytePerPixelInDETY[k],
5286 v->BytePerPixelInDETC[k],
5287 v->DETBufferSizeYThisState[k],
5288 v->DETBufferSizeCThisState[k],
5289 &v->UrgentBurstFactorCursorPre[k],
5290 &v->UrgentBurstFactorLumaPre[k],
5291 &v->UrgentBurstFactorChromaPre[k],
5292 &v->NotUrgentLatencyHidingPre[k]);
5293 }
5294
5295 v->MaximumReadBandwidthWithPrefetch = 0.0;
5296 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5297 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5298 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5299
5300 v->MaximumReadBandwidthWithPrefetch =
5301 v->MaximumReadBandwidthWithPrefetch
5302 + dml_max3(
5303 v->VActivePixelBandwidth[i][j][k]
5304 + v->VActiveCursorBandwidth[i][j][k]
5305 + v->NoOfDPP[i][j][k]
5306 * (v->meta_row_bandwidth[i][j][k]
5307 + v->dpte_row_bandwidth[i][j][k]),
5308 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5309 v->NoOfDPP[i][j][k]
5310 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5311 * v->UrgentBurstFactorLumaPre[k]
5312 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5313 * v->UrgentBurstFactorChromaPre[k])
5314 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5315 }
5316
5317 v->NotEnoughUrgentLatencyHidingPre = false;
5318 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5319 if (v->NotUrgentLatencyHidingPre[k] == true) {
5320 v->NotEnoughUrgentLatencyHidingPre = true;
5321 }
5322 }
5323
5324 v->PrefetchSupported[i][j] = true;
5325 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5326 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5327 v->PrefetchSupported[i][j] = false;
5328 }
5329 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5330 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5331 || v->NoTimeForPrefetch[i][j][k] == true) {
5332 v->PrefetchSupported[i][j] = false;
5333 }
5334 }
5335
5336 v->DynamicMetadataSupported[i][j] = true;
5337 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5338 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5339 v->DynamicMetadataSupported[i][j] = false;
5340 }
5341 }
5342
5343 v->VRatioInPrefetchSupported[i][j] = true;
5344 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5345 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5346 v->VRatioInPrefetchSupported[i][j] = false;
5347 }
5348 }
5349 v->AnyLinesForVMOrRowTooLarge = false;
5350 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5351 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5352 v->AnyLinesForVMOrRowTooLarge = true;
5353 }
5354 }
5355
5356 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5357
5358 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5359 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5360 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5361 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5362 - dml_max(
5363 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5364 v->NoOfDPP[i][j][k]
5365 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5366 * v->UrgentBurstFactorLumaPre[k]
5367 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5368 * v->UrgentBurstFactorChromaPre[k])
5369 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5370 }
5371 v->TotImmediateFlipBytes = 0.0;
5372 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5373 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5374 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5375 + v->DPTEBytesPerRow[i][j][k];
5376 }
5377
5378 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5379 CalculateFlipSchedule(
5380 mode_lib,
5381 k,
5382 HostVMInefficiencyFactor,
5383 v->ExtraLatency,
5384 v->UrgLatency[i],
5385 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5386 v->MetaRowBytes[i][j][k],
5387 v->DPTEBytesPerRow[i][j][k]);
5388 }
5389 v->total_dcn_read_bw_with_flip = 0.0;
5390 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5391 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5392 + dml_max3(
5393 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5394 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5395 + v->VActiveCursorBandwidth[i][j][k],
5396 v->NoOfDPP[i][j][k]
5397 * (v->final_flip_bw[k]
5398 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5399 * v->UrgentBurstFactorLumaPre[k]
5400 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5401 * v->UrgentBurstFactorChromaPre[k])
5402 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5403 }
5404 v->ImmediateFlipSupportedForState[i][j] = true;
5405 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5406 v->ImmediateFlipSupportedForState[i][j] = false;
5407 }
5408 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5409 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5410 v->ImmediateFlipSupportedForState[i][j] = false;
5411 }
5412 }
5413 } else {
5414 v->ImmediateFlipSupportedForState[i][j] = false;
5415 }
5416
5417 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5418 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5419 NextPrefetchModeState = NextPrefetchModeState + 1;
5420 } else {
5421 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5422 }
5423 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5424 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5425 && ((v->HostVMEnable == false &&
5426 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5427 || v->ImmediateFlipSupportedForState[i][j] == true))
5428 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5429
5430 CalculateUnboundedRequestAndCompressedBufferSize(
5431 v->DETBufferSizeInKByte[0],
5432 v->ConfigReturnBufferSizeInKByte,
5433 v->UseUnboundedRequesting,
5434 v->TotalNumberOfActiveDPP[i][j],
5435 NoChroma,
5436 v->MaxNumDPP,
5437 v->CompressedBufferSegmentSizeInkByte,
5438 v->Output,
5439 &UnboundedRequestEnabledThisState,
5440 &CompressedBufferSizeInkByteThisState);
5441
5442 CalculateWatermarksAndDRAMSpeedChangeSupport(
5443 mode_lib,
5444 v->PrefetchModePerState[i][j],
5445 v->DCFCLKState[i][j],
5446 v->ReturnBWPerState[i][j],
5447 v->UrgLatency[i],
5448 v->ExtraLatency,
5449 v->SOCCLKPerState[i],
5450 v->ProjectedDCFCLKDeepSleep[i][j],
5451 v->DETBufferSizeYThisState,
5452 v->DETBufferSizeCThisState,
5453 v->SwathHeightYThisState,
5454 v->SwathHeightCThisState,
5455 v->SwathWidthYThisState,
5456 v->SwathWidthCThisState,
5457 v->NoOfDPPThisState,
5458 v->BytePerPixelInDETY,
5459 v->BytePerPixelInDETC,
5460 UnboundedRequestEnabledThisState,
5461 CompressedBufferSizeInkByteThisState,
5462 &v->DRAMClockChangeSupport[i][j],
5463 &dummy,
5464 &dummy,
5465 &dummy,
5466 &dummy);
5467 }
5468 }
5469
5470 /*PTE Buffer Size Check*/
5471 for (i = 0; i < v->soc.num_states; i++) {
5472 for (j = 0; j < 2; j++) {
5473 v->PTEBufferSizeNotExceeded[i][j] = true;
5474 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5475 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5476 v->PTEBufferSizeNotExceeded[i][j] = false;
5477 }
5478 }
5479 }
5480 }
5481
5482 /*Cursor Support Check*/
5483 v->CursorSupport = true;
5484 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5485 if (v->CursorWidth[k][0] > 0.0) {
5486 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5487 v->CursorSupport = false;
5488 }
5489 }
5490 }
5491
5492 /*Valid Pitch Check*/
5493 v->PitchSupport = true;
5494 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5495 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5496 if (v->DCCEnable[k] == true) {
5497 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5498 } else {
5499 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5500 }
5501 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5502 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5503 && v->SourcePixelFormat[k] != dm_mono_8) {
5504 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5505 if (v->DCCEnable[k] == true) {
5506 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5507 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5508 64.0 * v->Read256BlockWidthC[k]);
5509 } else {
5510 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5511 }
5512 } else {
5513 v->AlignedCPitch[k] = v->PitchC[k];
5514 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5515 }
5516 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5517 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5518 v->PitchSupport = false;
5519 }
5520 }
5521
5522 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5523 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5524 ViewportExceedsSurface = true;
5525 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5526 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5527 && v->SourcePixelFormat[k] != dm_rgbe) {
5528 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5529 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5530 ViewportExceedsSurface = true;
5531 }
5532 }
5533 }
5534 }
5535
5536 /*Mode Support, Voltage State and SOC Configuration*/
5537 for (i = v->soc.num_states - 1; i >= 0; i--) {
5538 for (j = 0; j < 2; j++) {
5539 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5540 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5541 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5542 && v->DTBCLKRequiredMoreThanSupported[i] == false
5543 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5544 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5545 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5546 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5547 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5548 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5549 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5550 && ((v->HostVMEnable == false
5551 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5552 || v->ImmediateFlipSupportedForState[i][j] == true)
5553 && FMTBufferExceeded == false) {
5554 v->ModeSupport[i][j] = true;
5555 } else {
5556 v->ModeSupport[i][j] = false;
5557 }
5558 }
5559 }
5560
5561 {
5562 unsigned int MaximumMPCCombine = 0;
5563
5564 for (i = v->soc.num_states; i >= 0; i--) {
5565 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5566 v->VoltageLevel = i;
5567 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5568 if (v->ModeSupport[i][0] == true) {
5569 MaximumMPCCombine = 0;
5570 } else {
5571 MaximumMPCCombine = 1;
5572 }
5573 }
5574 }
5575 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5576 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5577 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5578 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5579 }
5580 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5581 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5582 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5583 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5584 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5585 v->maxMpcComb = MaximumMPCCombine;
5586 }
5587 }
5588
5589 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5590 struct display_mode_lib *mode_lib,
5591 unsigned int PrefetchMode,
5592 double DCFCLK,
5593 double ReturnBW,
5594 double UrgentLatency,
5595 double ExtraLatency,
5596 double SOCCLK,
5597 double DCFCLKDeepSleep,
5598 unsigned int DETBufferSizeY[],
5599 unsigned int DETBufferSizeC[],
5600 unsigned int SwathHeightY[],
5601 unsigned int SwathHeightC[],
5602 double SwathWidthY[],
5603 double SwathWidthC[],
5604 unsigned int DPPPerPlane[],
5605 double BytePerPixelDETY[],
5606 double BytePerPixelDETC[],
5607 bool UnboundedRequestEnabled,
5608 unsigned int CompressedBufferSizeInkByte,
5609 enum clock_change_support *DRAMClockChangeSupport,
5610 double *StutterExitWatermark,
5611 double *StutterEnterPlusExitWatermark,
5612 double *Z8StutterExitWatermark,
5613 double *Z8StutterEnterPlusExitWatermark)
5614 {
5615 struct vba_vars_st *v = &mode_lib->vba;
5616 double EffectiveLBLatencyHidingY;
5617 double EffectiveLBLatencyHidingC;
5618 double LinesInDETY[DC__NUM_DPP__MAX];
5619 double LinesInDETC;
5620 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5621 unsigned int LinesInDETCRoundedDownToSwath;
5622 double FullDETBufferingTimeY;
5623 double FullDETBufferingTimeC;
5624 double ActiveDRAMClockChangeLatencyMarginY;
5625 double ActiveDRAMClockChangeLatencyMarginC;
5626 double WritebackDRAMClockChangeLatencyMargin;
5627 double PlaneWithMinActiveDRAMClockChangeMargin;
5628 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5629 double WritebackDRAMClockChangeLatencyHiding;
5630 double TotalPixelBW = 0.0;
5631 int k, j;
5632
5633 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5634
5635 #ifdef __DML_VBA_DEBUG__
5636 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5637 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5638 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5639 #endif
5640
5641 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5642
5643 #ifdef __DML_VBA_DEBUG__
5644 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5645 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5646 #endif
5647
5648 v->TotalActiveWriteback = 0;
5649 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5650 if (v->WritebackEnable[k] == true) {
5651 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5652 }
5653 }
5654
5655 if (v->TotalActiveWriteback <= 1) {
5656 v->WritebackUrgentWatermark = v->WritebackLatency;
5657 } else {
5658 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5659 }
5660
5661 if (v->TotalActiveWriteback <= 1) {
5662 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5663 } else {
5664 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5665 }
5666
5667 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5668 TotalPixelBW = TotalPixelBW
5669 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5670 / (v->HTotal[k] / v->PixelClock[k]);
5671 }
5672
5673 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5674 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5675
5676 v->LBLatencyHidingSourceLinesY = dml_min(
5677 (double) v->MaxLineBufferLines,
5678 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5679
5680 v->LBLatencyHidingSourceLinesC = dml_min(
5681 (double) v->MaxLineBufferLines,
5682 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5683
5684 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5685
5686 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5687
5688 if (UnboundedRequestEnabled) {
5689 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5690 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5691 }
5692
5693 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5694 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5695 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5696 if (BytePerPixelDETC[k] > 0) {
5697 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5698 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5699 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5700 } else {
5701 LinesInDETC = 0;
5702 FullDETBufferingTimeC = 999999;
5703 }
5704
5705 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5706 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5707
5708 if (v->NumberOfActivePlanes > 1) {
5709 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5710 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5711 }
5712
5713 if (BytePerPixelDETC[k] > 0) {
5714 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5715 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5716
5717 if (v->NumberOfActivePlanes > 1) {
5718 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5719 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5720 }
5721 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5722 } else {
5723 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5724 }
5725
5726 if (v->WritebackEnable[k] == true) {
5727 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5728 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5729 if (v->WritebackPixelFormat[k] == dm_444_64) {
5730 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5731 }
5732 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5733 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5734 }
5735 }
5736
5737 v->MinActiveDRAMClockChangeMargin = 999999;
5738 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5739 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5740 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5741 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5742 if (v->BlendingAndTiming[k] == k) {
5743 PlaneWithMinActiveDRAMClockChangeMargin = k;
5744 } else {
5745 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5746 if (v->BlendingAndTiming[k] == j) {
5747 PlaneWithMinActiveDRAMClockChangeMargin = j;
5748 }
5749 }
5750 }
5751 }
5752 }
5753
5754 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5755
5756 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5757 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5758 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5759 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5760 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5761 }
5762 }
5763
5764 v->TotalNumberOfActiveOTG = 0;
5765
5766 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5767 if (v->BlendingAndTiming[k] == k) {
5768 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5769 }
5770 }
5771
5772 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5773 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5774 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5775 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5776 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5777 } else {
5778 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5779 }
5780
5781 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5782 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5783 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5784 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5785
5786 #ifdef __DML_VBA_DEBUG__
5787 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5788 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5789 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5790 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5791 #endif
5792 }
5793
5794 static void CalculateDCFCLKDeepSleep(
5795 struct display_mode_lib *mode_lib,
5796 unsigned int NumberOfActivePlanes,
5797 int BytePerPixelY[],
5798 int BytePerPixelC[],
5799 double VRatio[],
5800 double VRatioChroma[],
5801 double SwathWidthY[],
5802 double SwathWidthC[],
5803 unsigned int DPPPerPlane[],
5804 double HRatio[],
5805 double HRatioChroma[],
5806 double PixelClock[],
5807 double PSCL_THROUGHPUT[],
5808 double PSCL_THROUGHPUT_CHROMA[],
5809 double DPPCLK[],
5810 double ReadBandwidthLuma[],
5811 double ReadBandwidthChroma[],
5812 int ReturnBusWidth,
5813 double *DCFCLKDeepSleep)
5814 {
5815 struct vba_vars_st *v = &mode_lib->vba;
5816 double DisplayPipeLineDeliveryTimeLuma;
5817 double DisplayPipeLineDeliveryTimeChroma;
5818 double ReadBandwidth = 0.0;
5819 int k;
5820
5821 for (k = 0; k < NumberOfActivePlanes; ++k) {
5822
5823 if (VRatio[k] <= 1) {
5824 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5825 } else {
5826 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5827 }
5828 if (BytePerPixelC[k] == 0) {
5829 DisplayPipeLineDeliveryTimeChroma = 0;
5830 } else {
5831 if (VRatioChroma[k] <= 1) {
5832 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5833 } else {
5834 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5835 }
5836 }
5837
5838 if (BytePerPixelC[k] > 0) {
5839 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5840 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5841 } else {
5842 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5843 }
5844 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5845
5846 }
5847
5848 for (k = 0; k < NumberOfActivePlanes; ++k) {
5849 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5850 }
5851
5852 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5853
5854 for (k = 0; k < NumberOfActivePlanes; ++k) {
5855 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5856 }
5857 }
5858
5859 static void CalculateUrgentBurstFactor(
5860 int swath_width_luma_ub,
5861 int swath_width_chroma_ub,
5862 unsigned int SwathHeightY,
5863 unsigned int SwathHeightC,
5864 double LineTime,
5865 double UrgentLatency,
5866 double CursorBufferSize,
5867 unsigned int CursorWidth,
5868 unsigned int CursorBPP,
5869 double VRatio,
5870 double VRatioC,
5871 double BytePerPixelInDETY,
5872 double BytePerPixelInDETC,
5873 double DETBufferSizeY,
5874 double DETBufferSizeC,
5875 double *UrgentBurstFactorCursor,
5876 double *UrgentBurstFactorLuma,
5877 double *UrgentBurstFactorChroma,
5878 bool *NotEnoughUrgentLatencyHiding)
5879 {
5880 double LinesInDETLuma;
5881 double LinesInDETChroma;
5882 unsigned int LinesInCursorBuffer;
5883 double CursorBufferSizeInTime;
5884 double DETBufferSizeInTimeLuma;
5885 double DETBufferSizeInTimeChroma;
5886
5887 *NotEnoughUrgentLatencyHiding = 0;
5888
5889 if (CursorWidth > 0) {
5890 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5891 if (VRatio > 0) {
5892 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5893 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5894 *NotEnoughUrgentLatencyHiding = 1;
5895 *UrgentBurstFactorCursor = 0;
5896 } else {
5897 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5898 }
5899 } else {
5900 *UrgentBurstFactorCursor = 1;
5901 }
5902 }
5903
5904 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5905 if (VRatio > 0) {
5906 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5907 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5908 *NotEnoughUrgentLatencyHiding = 1;
5909 *UrgentBurstFactorLuma = 0;
5910 } else {
5911 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5912 }
5913 } else {
5914 *UrgentBurstFactorLuma = 1;
5915 }
5916
5917 if (BytePerPixelInDETC > 0) {
5918 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5919 if (VRatio > 0) {
5920 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5921 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5922 *NotEnoughUrgentLatencyHiding = 1;
5923 *UrgentBurstFactorChroma = 0;
5924 } else {
5925 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5926 }
5927 } else {
5928 *UrgentBurstFactorChroma = 1;
5929 }
5930 }
5931 }
5932
5933 static void CalculatePixelDeliveryTimes(
5934 unsigned int NumberOfActivePlanes,
5935 double VRatio[],
5936 double VRatioChroma[],
5937 double VRatioPrefetchY[],
5938 double VRatioPrefetchC[],
5939 unsigned int swath_width_luma_ub[],
5940 unsigned int swath_width_chroma_ub[],
5941 unsigned int DPPPerPlane[],
5942 double HRatio[],
5943 double HRatioChroma[],
5944 double PixelClock[],
5945 double PSCL_THROUGHPUT[],
5946 double PSCL_THROUGHPUT_CHROMA[],
5947 double DPPCLK[],
5948 int BytePerPixelC[],
5949 enum scan_direction_class SourceScan[],
5950 unsigned int NumberOfCursors[],
5951 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5952 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5953 unsigned int BlockWidth256BytesY[],
5954 unsigned int BlockHeight256BytesY[],
5955 unsigned int BlockWidth256BytesC[],
5956 unsigned int BlockHeight256BytesC[],
5957 double DisplayPipeLineDeliveryTimeLuma[],
5958 double DisplayPipeLineDeliveryTimeChroma[],
5959 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5960 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5961 double DisplayPipeRequestDeliveryTimeLuma[],
5962 double DisplayPipeRequestDeliveryTimeChroma[],
5963 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5964 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5965 double CursorRequestDeliveryTime[],
5966 double CursorRequestDeliveryTimePrefetch[])
5967 {
5968 double req_per_swath_ub;
5969 int k;
5970
5971 for (k = 0; k < NumberOfActivePlanes; ++k) {
5972 if (VRatio[k] <= 1) {
5973 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5974 } else {
5975 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5976 }
5977
5978 if (BytePerPixelC[k] == 0) {
5979 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5980 } else {
5981 if (VRatioChroma[k] <= 1) {
5982 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5983 } else {
5984 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5985 }
5986 }
5987
5988 if (VRatioPrefetchY[k] <= 1) {
5989 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5990 } else {
5991 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5992 }
5993
5994 if (BytePerPixelC[k] == 0) {
5995 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5996 } else {
5997 if (VRatioPrefetchC[k] <= 1) {
5998 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5999 } else {
6000 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6001 }
6002 }
6003 }
6004
6005 for (k = 0; k < NumberOfActivePlanes; ++k) {
6006 if (SourceScan[k] != dm_vert) {
6007 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6008 } else {
6009 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6010 }
6011 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6012 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6013 if (BytePerPixelC[k] == 0) {
6014 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6015 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6016 } else {
6017 if (SourceScan[k] != dm_vert) {
6018 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6019 } else {
6020 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6021 }
6022 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6023 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6024 }
6025 #ifdef __DML_VBA_DEBUG__
6026 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6027 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6028 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6029 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6030 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6031 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6032 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6033 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6034 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6035 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6036 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6037 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6038 #endif
6039 }
6040
6041 for (k = 0; k < NumberOfActivePlanes; ++k) {
6042 int cursor_req_per_width;
6043
6044 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6045 if (NumberOfCursors[k] > 0) {
6046 if (VRatio[k] <= 1) {
6047 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6048 } else {
6049 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6050 }
6051 if (VRatioPrefetchY[k] <= 1) {
6052 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6053 } else {
6054 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6055 }
6056 } else {
6057 CursorRequestDeliveryTime[k] = 0;
6058 CursorRequestDeliveryTimePrefetch[k] = 0;
6059 }
6060 #ifdef __DML_VBA_DEBUG__
6061 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6062 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6063 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6064 #endif
6065 }
6066 }
6067
6068 static void CalculateMetaAndPTETimes(
6069 int NumberOfActivePlanes,
6070 bool GPUVMEnable,
6071 int MetaChunkSize,
6072 int MinMetaChunkSizeBytes,
6073 int HTotal[],
6074 double VRatio[],
6075 double VRatioChroma[],
6076 double DestinationLinesToRequestRowInVBlank[],
6077 double DestinationLinesToRequestRowInImmediateFlip[],
6078 bool DCCEnable[],
6079 double PixelClock[],
6080 int BytePerPixelY[],
6081 int BytePerPixelC[],
6082 enum scan_direction_class SourceScan[],
6083 int dpte_row_height[],
6084 int dpte_row_height_chroma[],
6085 int meta_row_width[],
6086 int meta_row_width_chroma[],
6087 int meta_row_height[],
6088 int meta_row_height_chroma[],
6089 int meta_req_width[],
6090 int meta_req_width_chroma[],
6091 int meta_req_height[],
6092 int meta_req_height_chroma[],
6093 int dpte_group_bytes[],
6094 int PTERequestSizeY[],
6095 int PTERequestSizeC[],
6096 int PixelPTEReqWidthY[],
6097 int PixelPTEReqHeightY[],
6098 int PixelPTEReqWidthC[],
6099 int PixelPTEReqHeightC[],
6100 int dpte_row_width_luma_ub[],
6101 int dpte_row_width_chroma_ub[],
6102 double DST_Y_PER_PTE_ROW_NOM_L[],
6103 double DST_Y_PER_PTE_ROW_NOM_C[],
6104 double DST_Y_PER_META_ROW_NOM_L[],
6105 double DST_Y_PER_META_ROW_NOM_C[],
6106 double TimePerMetaChunkNominal[],
6107 double TimePerChromaMetaChunkNominal[],
6108 double TimePerMetaChunkVBlank[],
6109 double TimePerChromaMetaChunkVBlank[],
6110 double TimePerMetaChunkFlip[],
6111 double TimePerChromaMetaChunkFlip[],
6112 double time_per_pte_group_nom_luma[],
6113 double time_per_pte_group_vblank_luma[],
6114 double time_per_pte_group_flip_luma[],
6115 double time_per_pte_group_nom_chroma[],
6116 double time_per_pte_group_vblank_chroma[],
6117 double time_per_pte_group_flip_chroma[])
6118 {
6119 unsigned int meta_chunk_width;
6120 unsigned int min_meta_chunk_width;
6121 unsigned int meta_chunk_per_row_int;
6122 unsigned int meta_row_remainder;
6123 unsigned int meta_chunk_threshold;
6124 unsigned int meta_chunks_per_row_ub;
6125 unsigned int meta_chunk_width_chroma;
6126 unsigned int min_meta_chunk_width_chroma;
6127 unsigned int meta_chunk_per_row_int_chroma;
6128 unsigned int meta_row_remainder_chroma;
6129 unsigned int meta_chunk_threshold_chroma;
6130 unsigned int meta_chunks_per_row_ub_chroma;
6131 unsigned int dpte_group_width_luma;
6132 unsigned int dpte_groups_per_row_luma_ub;
6133 unsigned int dpte_group_width_chroma;
6134 unsigned int dpte_groups_per_row_chroma_ub;
6135 int k;
6136
6137 for (k = 0; k < NumberOfActivePlanes; ++k) {
6138 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6139 if (BytePerPixelC[k] == 0) {
6140 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6141 } else {
6142 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6143 }
6144 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6145 if (BytePerPixelC[k] == 0) {
6146 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6147 } else {
6148 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6149 }
6150 }
6151
6152 for (k = 0; k < NumberOfActivePlanes; ++k) {
6153 if (DCCEnable[k] == true) {
6154 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6155 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6156 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6157 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6158 if (SourceScan[k] != dm_vert) {
6159 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6160 } else {
6161 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6162 }
6163 if (meta_row_remainder <= meta_chunk_threshold) {
6164 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6165 } else {
6166 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6167 }
6168 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6169 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6170 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6171 if (BytePerPixelC[k] == 0) {
6172 TimePerChromaMetaChunkNominal[k] = 0;
6173 TimePerChromaMetaChunkVBlank[k] = 0;
6174 TimePerChromaMetaChunkFlip[k] = 0;
6175 } else {
6176 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6177 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6178 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6179 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6180 if (SourceScan[k] != dm_vert) {
6181 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6182 } else {
6183 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6184 }
6185 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6186 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6187 } else {
6188 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6189 }
6190 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6191 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6192 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6193 }
6194 } else {
6195 TimePerMetaChunkNominal[k] = 0;
6196 TimePerMetaChunkVBlank[k] = 0;
6197 TimePerMetaChunkFlip[k] = 0;
6198 TimePerChromaMetaChunkNominal[k] = 0;
6199 TimePerChromaMetaChunkVBlank[k] = 0;
6200 TimePerChromaMetaChunkFlip[k] = 0;
6201 }
6202 }
6203
6204 for (k = 0; k < NumberOfActivePlanes; ++k) {
6205 if (GPUVMEnable == true) {
6206 if (SourceScan[k] != dm_vert) {
6207 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6208 } else {
6209 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6210 }
6211 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6212 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6213 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6214 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6215 if (BytePerPixelC[k] == 0) {
6216 time_per_pte_group_nom_chroma[k] = 0;
6217 time_per_pte_group_vblank_chroma[k] = 0;
6218 time_per_pte_group_flip_chroma[k] = 0;
6219 } else {
6220 if (SourceScan[k] != dm_vert) {
6221 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6222 } else {
6223 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6224 }
6225 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6226 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6227 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6228 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6229 }
6230 } else {
6231 time_per_pte_group_nom_luma[k] = 0;
6232 time_per_pte_group_vblank_luma[k] = 0;
6233 time_per_pte_group_flip_luma[k] = 0;
6234 time_per_pte_group_nom_chroma[k] = 0;
6235 time_per_pte_group_vblank_chroma[k] = 0;
6236 time_per_pte_group_flip_chroma[k] = 0;
6237 }
6238 }
6239 }
6240
6241 static void CalculateVMGroupAndRequestTimes(
6242 unsigned int NumberOfActivePlanes,
6243 bool GPUVMEnable,
6244 unsigned int GPUVMMaxPageTableLevels,
6245 unsigned int HTotal[],
6246 int BytePerPixelC[],
6247 double DestinationLinesToRequestVMInVBlank[],
6248 double DestinationLinesToRequestVMInImmediateFlip[],
6249 bool DCCEnable[],
6250 double PixelClock[],
6251 int dpte_row_width_luma_ub[],
6252 int dpte_row_width_chroma_ub[],
6253 int vm_group_bytes[],
6254 unsigned int dpde0_bytes_per_frame_ub_l[],
6255 unsigned int dpde0_bytes_per_frame_ub_c[],
6256 int meta_pte_bytes_per_frame_ub_l[],
6257 int meta_pte_bytes_per_frame_ub_c[],
6258 double TimePerVMGroupVBlank[],
6259 double TimePerVMGroupFlip[],
6260 double TimePerVMRequestVBlank[],
6261 double TimePerVMRequestFlip[])
6262 {
6263 int num_group_per_lower_vm_stage;
6264 int num_req_per_lower_vm_stage;
6265 int k;
6266
6267 for (k = 0; k < NumberOfActivePlanes; ++k) {
6268 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6269 if (DCCEnable[k] == false) {
6270 if (BytePerPixelC[k] > 0) {
6271 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6272 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6273 } else {
6274 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6275 }
6276 } else {
6277 if (GPUVMMaxPageTableLevels == 1) {
6278 if (BytePerPixelC[k] > 0) {
6279 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6280 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6281 } else {
6282 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6283 }
6284 } else {
6285 if (BytePerPixelC[k] > 0) {
6286 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6287 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6288 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6289 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6290 } else {
6291 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6292 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6293 }
6294 }
6295 }
6296
6297 if (DCCEnable[k] == false) {
6298 if (BytePerPixelC[k] > 0) {
6299 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6300 } else {
6301 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6302 }
6303 } else {
6304 if (GPUVMMaxPageTableLevels == 1) {
6305 if (BytePerPixelC[k] > 0) {
6306 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6307 } else {
6308 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6309 }
6310 } else {
6311 if (BytePerPixelC[k] > 0) {
6312 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6313 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6314 } else {
6315 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6316 }
6317 }
6318 }
6319
6320 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6321 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6322 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6323 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6324
6325 if (GPUVMMaxPageTableLevels > 2) {
6326 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6327 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6328 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6329 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6330 }
6331
6332 } else {
6333 TimePerVMGroupVBlank[k] = 0;
6334 TimePerVMGroupFlip[k] = 0;
6335 TimePerVMRequestVBlank[k] = 0;
6336 TimePerVMRequestFlip[k] = 0;
6337 }
6338 }
6339 }
6340
6341 static void CalculateStutterEfficiency(
6342 struct display_mode_lib *mode_lib,
6343 int CompressedBufferSizeInkByte,
6344 bool UnboundedRequestEnabled,
6345 int ConfigReturnBufferSizeInKByte,
6346 int MetaFIFOSizeInKEntries,
6347 int ZeroSizeBufferEntries,
6348 int NumberOfActivePlanes,
6349 int ROBBufferSizeInKByte,
6350 double TotalDataReadBandwidth,
6351 double DCFCLK,
6352 double ReturnBW,
6353 double COMPBUF_RESERVED_SPACE_64B,
6354 double COMPBUF_RESERVED_SPACE_ZS,
6355 double SRExitTime,
6356 double SRExitZ8Time,
6357 bool SynchronizedVBlank,
6358 double Z8StutterEnterPlusExitWatermark,
6359 double StutterEnterPlusExitWatermark,
6360 bool ProgressiveToInterlaceUnitInOPP,
6361 bool Interlace[],
6362 double MinTTUVBlank[],
6363 int DPPPerPlane[],
6364 unsigned int DETBufferSizeY[],
6365 int BytePerPixelY[],
6366 double BytePerPixelDETY[],
6367 double SwathWidthY[],
6368 int SwathHeightY[],
6369 int SwathHeightC[],
6370 double NetDCCRateLuma[],
6371 double NetDCCRateChroma[],
6372 double DCCFractionOfZeroSizeRequestsLuma[],
6373 double DCCFractionOfZeroSizeRequestsChroma[],
6374 int HTotal[],
6375 int VTotal[],
6376 double PixelClock[],
6377 double VRatio[],
6378 enum scan_direction_class SourceScan[],
6379 int BlockHeight256BytesY[],
6380 int BlockWidth256BytesY[],
6381 int BlockHeight256BytesC[],
6382 int BlockWidth256BytesC[],
6383 int DCCYMaxUncompressedBlock[],
6384 int DCCCMaxUncompressedBlock[],
6385 int VActive[],
6386 bool DCCEnable[],
6387 bool WritebackEnable[],
6388 double ReadBandwidthPlaneLuma[],
6389 double ReadBandwidthPlaneChroma[],
6390 double meta_row_bw[],
6391 double dpte_row_bw[],
6392 double *StutterEfficiencyNotIncludingVBlank,
6393 double *StutterEfficiency,
6394 int *NumberOfStutterBurstsPerFrame,
6395 double *Z8StutterEfficiencyNotIncludingVBlank,
6396 double *Z8StutterEfficiency,
6397 int *Z8NumberOfStutterBurstsPerFrame,
6398 double *StutterPeriod)
6399 {
6400 struct vba_vars_st *v = &mode_lib->vba;
6401
6402 double DETBufferingTimeY;
6403 double SwathWidthYCriticalPlane = 0;
6404 double VActiveTimeCriticalPlane = 0;
6405 double FrameTimeCriticalPlane = 0;
6406 int BytePerPixelYCriticalPlane = 0;
6407 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6408 double MinTTUVBlankCriticalPlane = 0;
6409 double TotalCompressedReadBandwidth;
6410 double TotalRowReadBandwidth;
6411 double AverageDCCCompressionRate;
6412 double EffectiveCompressedBufferSize;
6413 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6414 double StutterBurstTime;
6415 int TotalActiveWriteback;
6416 double LinesInDETY;
6417 double LinesInDETYRoundedDownToSwath;
6418 double MaximumEffectiveCompressionLuma;
6419 double MaximumEffectiveCompressionChroma;
6420 double TotalZeroSizeRequestReadBandwidth;
6421 double TotalZeroSizeCompressedReadBandwidth;
6422 double AverageDCCZeroSizeFraction;
6423 double AverageZeroSizeCompressionRate;
6424 int TotalNumberOfActiveOTG = 0;
6425 double LastStutterPeriod = 0.0;
6426 double LastZ8StutterPeriod = 0.0;
6427 int k;
6428
6429 TotalZeroSizeRequestReadBandwidth = 0;
6430 TotalZeroSizeCompressedReadBandwidth = 0;
6431 TotalRowReadBandwidth = 0;
6432 TotalCompressedReadBandwidth = 0;
6433
6434 for (k = 0; k < NumberOfActivePlanes; ++k) {
6435 if (DCCEnable[k] == true) {
6436 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6437 || DCCYMaxUncompressedBlock[k] < 256) {
6438 MaximumEffectiveCompressionLuma = 2;
6439 } else {
6440 MaximumEffectiveCompressionLuma = 4;
6441 }
6442 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6443 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6444 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6445 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6446 if (ReadBandwidthPlaneChroma[k] > 0) {
6447 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6448 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6449 MaximumEffectiveCompressionChroma = 2;
6450 } else {
6451 MaximumEffectiveCompressionChroma = 4;
6452 }
6453 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6454 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6455 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6456 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6457 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6458 }
6459 } else {
6460 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6461 }
6462 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6463 }
6464
6465 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6466 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6467
6468 #ifdef __DML_VBA_DEBUG__
6469 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6470 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6471 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6472 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6473 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6474 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6475 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6476 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6477 #endif
6478
6479 if (AverageDCCZeroSizeFraction == 1) {
6480 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6481 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6482 } else if (AverageDCCZeroSizeFraction > 0) {
6483 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6484 EffectiveCompressedBufferSize = dml_min(
6485 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6486 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6487 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6488 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6489 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6490 dml_print(
6491 "DML::%s: min 2 = %f\n",
6492 __func__,
6493 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6494 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6495 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6496 } else {
6497 EffectiveCompressedBufferSize = dml_min(
6498 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6499 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6500 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6501 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6502 }
6503
6504 #ifdef __DML_VBA_DEBUG__
6505 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6506 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6507 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6508 #endif
6509
6510 *StutterPeriod = 0;
6511 for (k = 0; k < NumberOfActivePlanes; ++k) {
6512 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6513 / BytePerPixelDETY[k] / SwathWidthY[k];
6514 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6515 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6516 #ifdef __DML_VBA_DEBUG__
6517 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6518 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6519 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6520 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6521 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6522 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6523 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6524 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6525 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6526 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6527 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6528 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6529 #endif
6530
6531 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6532 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6533
6534 *StutterPeriod = DETBufferingTimeY;
6535 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6536 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6537 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6538 SwathWidthYCriticalPlane = SwathWidthY[k];
6539 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6540 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6541
6542 #ifdef __DML_VBA_DEBUG__
6543 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6544 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6545 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6546 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6547 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6548 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6549 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6550 #endif
6551 }
6552 }
6553
6554 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6555 #ifdef __DML_VBA_DEBUG__
6556 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6557 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6558 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6559 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6560 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6561 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6562 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6563 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6564 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6565 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6566 #endif
6567
6568 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6569 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6570 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6571 #ifdef __DML_VBA_DEBUG__
6572 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6573 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6574 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6575 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6576 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6577 #endif
6578 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6579
6580 dml_print(
6581 "DML::%s: Time to finish residue swath=%f\n",
6582 __func__,
6583 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6584
6585 TotalActiveWriteback = 0;
6586 for (k = 0; k < NumberOfActivePlanes; ++k) {
6587 if (WritebackEnable[k]) {
6588 TotalActiveWriteback = TotalActiveWriteback + 1;
6589 }
6590 }
6591
6592 if (TotalActiveWriteback == 0) {
6593 #ifdef __DML_VBA_DEBUG__
6594 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6595 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6596 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6597 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6598 #endif
6599 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6600 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6601 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6602 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6603 } else {
6604 *StutterEfficiencyNotIncludingVBlank = 0.;
6605 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6606 *NumberOfStutterBurstsPerFrame = 0;
6607 *Z8NumberOfStutterBurstsPerFrame = 0;
6608 }
6609 #ifdef __DML_VBA_DEBUG__
6610 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6611 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6612 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6613 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6614 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6615 #endif
6616
6617 for (k = 0; k < NumberOfActivePlanes; ++k) {
6618 if (v->BlendingAndTiming[k] == k) {
6619 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6620 }
6621 }
6622
6623 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6624 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6625
6626 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6627 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6628 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6629 } else {
6630 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6631 }
6632 } else {
6633 *StutterEfficiency = 0;
6634 }
6635
6636 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6637 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6638 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6639 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6640 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6641 } else {
6642 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6643 }
6644 } else {
6645 *Z8StutterEfficiency = 0.;
6646 }
6647
6648 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6649 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6650 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6651 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6652 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6653 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6654 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6655 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6656 }
6657
6658 static void CalculateSwathAndDETConfiguration(
6659 bool ForceSingleDPP,
6660 int NumberOfActivePlanes,
6661 unsigned int DETBufferSizeInKByte,
6662 double MaximumSwathWidthLuma[],
6663 double MaximumSwathWidthChroma[],
6664 enum scan_direction_class SourceScan[],
6665 enum source_format_class SourcePixelFormat[],
6666 enum dm_swizzle_mode SurfaceTiling[],
6667 int ViewportWidth[],
6668 int ViewportHeight[],
6669 int SurfaceWidthY[],
6670 int SurfaceWidthC[],
6671 int SurfaceHeightY[],
6672 int SurfaceHeightC[],
6673 int Read256BytesBlockHeightY[],
6674 int Read256BytesBlockHeightC[],
6675 int Read256BytesBlockWidthY[],
6676 int Read256BytesBlockWidthC[],
6677 enum odm_combine_mode ODMCombineEnabled[],
6678 int BlendingAndTiming[],
6679 int BytePerPixY[],
6680 int BytePerPixC[],
6681 double BytePerPixDETY[],
6682 double BytePerPixDETC[],
6683 int HActive[],
6684 double HRatio[],
6685 double HRatioChroma[],
6686 int DPPPerPlane[],
6687 int swath_width_luma_ub[],
6688 int swath_width_chroma_ub[],
6689 double SwathWidth[],
6690 double SwathWidthChroma[],
6691 int SwathHeightY[],
6692 int SwathHeightC[],
6693 unsigned int DETBufferSizeY[],
6694 unsigned int DETBufferSizeC[],
6695 bool ViewportSizeSupportPerPlane[],
6696 bool *ViewportSizeSupport)
6697 {
6698 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6699 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6700 int MinimumSwathHeightY;
6701 int MinimumSwathHeightC;
6702 int RoundedUpMaxSwathSizeBytesY;
6703 int RoundedUpMaxSwathSizeBytesC;
6704 int RoundedUpMinSwathSizeBytesY;
6705 int RoundedUpMinSwathSizeBytesC;
6706 int RoundedUpSwathSizeBytesY;
6707 int RoundedUpSwathSizeBytesC;
6708 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6709 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6710 int k;
6711
6712 CalculateSwathWidth(
6713 ForceSingleDPP,
6714 NumberOfActivePlanes,
6715 SourcePixelFormat,
6716 SourceScan,
6717 ViewportWidth,
6718 ViewportHeight,
6719 SurfaceWidthY,
6720 SurfaceWidthC,
6721 SurfaceHeightY,
6722 SurfaceHeightC,
6723 ODMCombineEnabled,
6724 BytePerPixY,
6725 BytePerPixC,
6726 Read256BytesBlockHeightY,
6727 Read256BytesBlockHeightC,
6728 Read256BytesBlockWidthY,
6729 Read256BytesBlockWidthC,
6730 BlendingAndTiming,
6731 HActive,
6732 HRatio,
6733 DPPPerPlane,
6734 SwathWidthSingleDPP,
6735 SwathWidthSingleDPPChroma,
6736 SwathWidth,
6737 SwathWidthChroma,
6738 MaximumSwathHeightY,
6739 MaximumSwathHeightC,
6740 swath_width_luma_ub,
6741 swath_width_chroma_ub);
6742
6743 *ViewportSizeSupport = true;
6744 for (k = 0; k < NumberOfActivePlanes; ++k) {
6745 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6746 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6747 if (SurfaceTiling[k] == dm_sw_linear
6748 || (SourcePixelFormat[k] == dm_444_64
6749 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6750 && SourceScan[k] != dm_vert)) {
6751 MinimumSwathHeightY = MaximumSwathHeightY[k];
6752 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6753 MinimumSwathHeightY = MaximumSwathHeightY[k];
6754 } else {
6755 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6756 }
6757 MinimumSwathHeightC = MaximumSwathHeightC[k];
6758 } else {
6759 if (SurfaceTiling[k] == dm_sw_linear) {
6760 MinimumSwathHeightY = MaximumSwathHeightY[k];
6761 MinimumSwathHeightC = MaximumSwathHeightC[k];
6762 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6763 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6764 MinimumSwathHeightC = MaximumSwathHeightC[k];
6765 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6766 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6767 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6768 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6769 MinimumSwathHeightY = MaximumSwathHeightY[k];
6770 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6771 } else {
6772 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6773 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6774 }
6775 }
6776
6777 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6778 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6779 if (SourcePixelFormat[k] == dm_420_10) {
6780 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6781 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6782 }
6783 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6784 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6785 if (SourcePixelFormat[k] == dm_420_10) {
6786 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6787 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6788 }
6789
6790 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6791 SwathHeightY[k] = MaximumSwathHeightY[k];
6792 SwathHeightC[k] = MaximumSwathHeightC[k];
6793 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6794 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6795 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6796 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6797 SwathHeightY[k] = MinimumSwathHeightY;
6798 SwathHeightC[k] = MaximumSwathHeightC[k];
6799 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6800 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6801 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6802 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6803 SwathHeightY[k] = MaximumSwathHeightY[k];
6804 SwathHeightC[k] = MinimumSwathHeightC;
6805 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6806 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6807 } else {
6808 SwathHeightY[k] = MinimumSwathHeightY;
6809 SwathHeightC[k] = MinimumSwathHeightC;
6810 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6811 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6812 }
6813 {
6814 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6815
6816 if (SwathHeightC[k] == 0) {
6817 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6818 DETBufferSizeC[k] = 0;
6819 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6820 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6821 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6822 } else {
6823 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6824 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6825 }
6826
6827 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6828 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6829 *ViewportSizeSupport = false;
6830 ViewportSizeSupportPerPlane[k] = false;
6831 } else {
6832 ViewportSizeSupportPerPlane[k] = true;
6833 }
6834 }
6835 }
6836 }
6837
6838 static void CalculateSwathWidth(
6839 bool ForceSingleDPP,
6840 int NumberOfActivePlanes,
6841 enum source_format_class SourcePixelFormat[],
6842 enum scan_direction_class SourceScan[],
6843 int ViewportWidth[],
6844 int ViewportHeight[],
6845 int SurfaceWidthY[],
6846 int SurfaceWidthC[],
6847 int SurfaceHeightY[],
6848 int SurfaceHeightC[],
6849 enum odm_combine_mode ODMCombineEnabled[],
6850 int BytePerPixY[],
6851 int BytePerPixC[],
6852 int Read256BytesBlockHeightY[],
6853 int Read256BytesBlockHeightC[],
6854 int Read256BytesBlockWidthY[],
6855 int Read256BytesBlockWidthC[],
6856 int BlendingAndTiming[],
6857 int HActive[],
6858 double HRatio[],
6859 int DPPPerPlane[],
6860 double SwathWidthSingleDPPY[],
6861 double SwathWidthSingleDPPC[],
6862 double SwathWidthY[],
6863 double SwathWidthC[],
6864 int MaximumSwathHeightY[],
6865 int MaximumSwathHeightC[],
6866 int swath_width_luma_ub[],
6867 int swath_width_chroma_ub[])
6868 {
6869 enum odm_combine_mode MainPlaneODMCombine;
6870 int j, k;
6871
6872 #ifdef __DML_VBA_DEBUG__
6873 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6874 #endif
6875
6876 for (k = 0; k < NumberOfActivePlanes; ++k) {
6877 if (SourceScan[k] != dm_vert) {
6878 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6879 } else {
6880 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6881 }
6882
6883 #ifdef __DML_VBA_DEBUG__
6884 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6885 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6886 #endif
6887
6888 MainPlaneODMCombine = ODMCombineEnabled[k];
6889 for (j = 0; j < NumberOfActivePlanes; ++j) {
6890 if (BlendingAndTiming[k] == j) {
6891 MainPlaneODMCombine = ODMCombineEnabled[j];
6892 }
6893 }
6894
6895 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6896 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6897 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6898 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6899 else if (DPPPerPlane[k] == 2)
6900 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6901 else
6902 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6903
6904 #ifdef __DML_VBA_DEBUG__
6905 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6906 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6907 #endif
6908
6909 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6910 SwathWidthC[k] = SwathWidthY[k] / 2;
6911 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6912 } else {
6913 SwathWidthC[k] = SwathWidthY[k];
6914 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6915 }
6916
6917 if (ForceSingleDPP == true) {
6918 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6919 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6920 }
6921 {
6922 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6923 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6924
6925 #ifdef __DML_VBA_DEBUG__
6926 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6927 #endif
6928
6929 if (SourceScan[k] != dm_vert) {
6930 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6931 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6932 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6933 if (BytePerPixC[k] > 0) {
6934 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6935
6936 swath_width_chroma_ub[k] = dml_min(
6937 surface_width_ub_c,
6938 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6939 } else {
6940 swath_width_chroma_ub[k] = 0;
6941 }
6942 } else {
6943 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6944 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6945 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6946 if (BytePerPixC[k] > 0) {
6947 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6948
6949 swath_width_chroma_ub[k] = dml_min(
6950 surface_height_ub_c,
6951 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6952 } else {
6953 swath_width_chroma_ub[k] = 0;
6954 }
6955 }
6956 }
6957 }
6958 }
6959
6960 static double CalculateExtraLatency(
6961 int RoundTripPingLatencyCycles,
6962 int ReorderingBytes,
6963 double DCFCLK,
6964 int TotalNumberOfActiveDPP,
6965 int PixelChunkSizeInKByte,
6966 int TotalNumberOfDCCActiveDPP,
6967 int MetaChunkSize,
6968 double ReturnBW,
6969 bool GPUVMEnable,
6970 bool HostVMEnable,
6971 int NumberOfActivePlanes,
6972 int NumberOfDPP[],
6973 int dpte_group_bytes[],
6974 double HostVMInefficiencyFactor,
6975 double HostVMMinPageSize,
6976 int HostVMMaxNonCachedPageTableLevels)
6977 {
6978 double ExtraLatencyBytes;
6979 double ExtraLatency;
6980
6981 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6982 ReorderingBytes,
6983 TotalNumberOfActiveDPP,
6984 PixelChunkSizeInKByte,
6985 TotalNumberOfDCCActiveDPP,
6986 MetaChunkSize,
6987 GPUVMEnable,
6988 HostVMEnable,
6989 NumberOfActivePlanes,
6990 NumberOfDPP,
6991 dpte_group_bytes,
6992 HostVMInefficiencyFactor,
6993 HostVMMinPageSize,
6994 HostVMMaxNonCachedPageTableLevels);
6995
6996 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6997
6998 #ifdef __DML_VBA_DEBUG__
6999 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7000 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7001 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7002 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7003 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7004 #endif
7005
7006 return ExtraLatency;
7007 }
7008
7009 static double CalculateExtraLatencyBytes(
7010 int ReorderingBytes,
7011 int TotalNumberOfActiveDPP,
7012 int PixelChunkSizeInKByte,
7013 int TotalNumberOfDCCActiveDPP,
7014 int MetaChunkSize,
7015 bool GPUVMEnable,
7016 bool HostVMEnable,
7017 int NumberOfActivePlanes,
7018 int NumberOfDPP[],
7019 int dpte_group_bytes[],
7020 double HostVMInefficiencyFactor,
7021 double HostVMMinPageSize,
7022 int HostVMMaxNonCachedPageTableLevels)
7023 {
7024 double ret;
7025 int HostVMDynamicLevels = 0, k;
7026
7027 if (GPUVMEnable == true && HostVMEnable == true) {
7028 if (HostVMMinPageSize < 2048)
7029 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7030 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
7031 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7032 else
7033 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7034 } else {
7035 HostVMDynamicLevels = 0;
7036 }
7037
7038 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7039
7040 if (GPUVMEnable == true) {
7041 for (k = 0; k < NumberOfActivePlanes; ++k)
7042 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7043 }
7044 return ret;
7045 }
7046
7047 static double CalculateUrgentLatency(
7048 double UrgentLatencyPixelDataOnly,
7049 double UrgentLatencyPixelMixedWithVMData,
7050 double UrgentLatencyVMDataOnly,
7051 bool DoUrgentLatencyAdjustment,
7052 double UrgentLatencyAdjustmentFabricClockComponent,
7053 double UrgentLatencyAdjustmentFabricClockReference,
7054 double FabricClock)
7055 {
7056 double ret;
7057
7058 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7059 if (DoUrgentLatencyAdjustment == true)
7060 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7061 return ret;
7062 }
7063
7064 static void UseMinimumDCFCLK(
7065 struct display_mode_lib *mode_lib,
7066 int MaxPrefetchMode,
7067 int ReorderingBytes)
7068 {
7069 struct vba_vars_st *v = &mode_lib->vba;
7070 int dummy1, i, j, k;
7071 double NormalEfficiency, dummy2, dummy3;
7072 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7073
7074 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7075 for (i = 0; i < v->soc.num_states; ++i) {
7076 for (j = 0; j <= 1; ++j) {
7077 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7078 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7079 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7080 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7081 double MinimumTWait;
7082 double NonDPTEBandwidth;
7083 double DPTEBandwidth;
7084 double DCFCLKRequiredForAverageBandwidth;
7085 double ExtraLatencyBytes;
7086 double ExtraLatencyCycles;
7087 double DCFCLKRequiredForPeakBandwidth;
7088 int NoOfDPPState[DC__NUM_DPP__MAX];
7089 double MinimumTvmPlus2Tr0;
7090
7091 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7092 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7093 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7094 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7095 }
7096
7097 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7098 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7099
7100 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7101 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7102 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7103 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7104 DCFCLKRequiredForAverageBandwidth = dml_max3(
7105 v->ProjectedDCFCLKDeepSleep[i][j],
7106 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7107 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7108 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7109
7110 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7111 ReorderingBytes,
7112 v->TotalNumberOfActiveDPP[i][j],
7113 v->PixelChunkSizeInKByte,
7114 v->TotalNumberOfDCCActiveDPP[i][j],
7115 v->MetaChunkSize,
7116 v->GPUVMEnable,
7117 v->HostVMEnable,
7118 v->NumberOfActivePlanes,
7119 NoOfDPPState,
7120 v->dpte_group_bytes,
7121 1,
7122 v->HostVMMinPageSize,
7123 v->HostVMMaxNonCachedPageTableLevels);
7124 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7125 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7126 double DCFCLKCyclesRequiredInPrefetch;
7127 double ExpectedPrefetchBWAcceleration;
7128 double PrefetchTime;
7129
7130 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7131 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7132 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7133 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7134 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7135 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7136 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7137 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7138 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7139 DynamicMetadataVMExtraLatency[k] =
7140 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7141 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7142 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7143 - v->UrgLatency[i]
7144 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7145 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7146 - DynamicMetadataVMExtraLatency[k];
7147
7148 if (PrefetchTime > 0) {
7149 double ExpectedVRatioPrefetch;
7150
7151 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7152 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7153 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7154 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7155 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7156 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7157 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7158 }
7159 } else {
7160 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7161 }
7162 if (v->DynamicMetadataEnable[k] == true) {
7163 double TSetupPipe;
7164 double TdmbfPipe;
7165 double TdmsksPipe;
7166 double TdmecPipe;
7167 double AllowedTimeForUrgentExtraLatency;
7168
7169 CalculateVupdateAndDynamicMetadataParameters(
7170 v->MaxInterDCNTileRepeaters,
7171 v->RequiredDPPCLK[i][j][k],
7172 v->RequiredDISPCLK[i][j],
7173 v->ProjectedDCFCLKDeepSleep[i][j],
7174 v->PixelClock[k],
7175 v->HTotal[k],
7176 v->VTotal[k] - v->VActive[k],
7177 v->DynamicMetadataTransmittedBytes[k],
7178 v->DynamicMetadataLinesBeforeActiveRequired[k],
7179 v->Interlace[k],
7180 v->ProgressiveToInterlaceUnitInOPP,
7181 &TSetupPipe,
7182 &TdmbfPipe,
7183 &TdmecPipe,
7184 &TdmsksPipe,
7185 &dummy1,
7186 &dummy2,
7187 &dummy3);
7188 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7189 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7190 if (AllowedTimeForUrgentExtraLatency > 0) {
7191 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7192 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7193 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7194 } else {
7195 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7196 }
7197 }
7198 }
7199 DCFCLKRequiredForPeakBandwidth = 0;
7200 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7201 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7202
7203 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7204 * (v->GPUVMEnable == true ?
7205 (v->HostVMEnable == true ?
7206 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7207 0);
7208 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7209 double MaximumTvmPlus2Tr0PlusTsw;
7210
7211 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7212 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7213 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7214 } else {
7215 DCFCLKRequiredForPeakBandwidth = dml_max3(
7216 DCFCLKRequiredForPeakBandwidth,
7217 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7218 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7219 }
7220 }
7221 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7222 }
7223 }
7224 }
7225
7226 static void CalculateUnboundedRequestAndCompressedBufferSize(
7227 unsigned int DETBufferSizeInKByte,
7228 int ConfigReturnBufferSizeInKByte,
7229 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7230 int TotalActiveDPP,
7231 bool NoChromaPlanes,
7232 int MaxNumDPP,
7233 int CompressedBufferSegmentSizeInkByteFinal,
7234 enum output_encoder_class *Output,
7235 bool *UnboundedRequestEnabled,
7236 int *CompressedBufferSizeInkByte)
7237 {
7238 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7239
7240 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7241 *CompressedBufferSizeInkByte = (
7242 *UnboundedRequestEnabled == true ?
7243 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7244 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7245 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7246
7247 #ifdef __DML_VBA_DEBUG__
7248 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7249 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7250 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7251 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7252 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7253 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7254 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7255 #endif
7256 }
7257
7258 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7259 {
7260 bool ret_val = false;
7261
7262 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7263 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7264 ret_val = false;
7265 return ret_val;
7266 }
7267
7268 static unsigned int CalculateMaxVStartup(
7269 unsigned int VTotal,
7270 unsigned int VActive,
7271 unsigned int VBlankNom,
7272 unsigned int HTotal,
7273 double PixelClock,
7274 bool ProgressiveTointerlaceUnitinOPP,
7275 bool Interlace,
7276 unsigned int VBlankNomDefaultUS,
7277 double WritebackDelayTime)
7278 {
7279 unsigned int MaxVStartup = 0;
7280 unsigned int vblank_size = 0;
7281 double line_time_us = HTotal / PixelClock;
7282 unsigned int vblank_actual = VTotal - VActive;
7283 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
7284 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7285 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7286
7287 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
7288 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7289 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
7290 else
7291 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
7292 if (MaxVStartup > 1023)
7293 MaxVStartup = 1023;
7294 return MaxVStartup;
7295 }
7296