1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #ifdef CONFIG_DRM_AMD_DC_DCN
27 #include "dc.h"
28 #include "dc_link.h"
29 #include "../display_mode_lib.h"
30 #include "display_mode_vba_30.h"
31 #include "../dml_inline_defs.h"
32
33
34 /*
35 * NOTE:
36 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
37 *
38 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
39 * ways. Unless there is something clearly wrong with it the code should
40 * remain as-is as it provides us with a guarantee from HW that it is correct.
41 */
42
43
44 typedef struct {
45 double DPPCLK;
46 double DISPCLK;
47 double PixelClock;
48 double DCFCLKDeepSleep;
49 unsigned int DPPPerPlane;
50 bool ScalerEnabled;
51 enum scan_direction_class SourceScan;
52 unsigned int BlockWidth256BytesY;
53 unsigned int BlockHeight256BytesY;
54 unsigned int BlockWidth256BytesC;
55 unsigned int BlockHeight256BytesC;
56 unsigned int InterlaceEnable;
57 unsigned int NumberOfCursors;
58 unsigned int VBlank;
59 unsigned int HTotal;
60 unsigned int DCCEnable;
61 bool ODMCombineEnabled;
62 } Pipe;
63
64 #define BPP_INVALID 0
65 #define BPP_BLENDED_PIPE 0xffffffff
66 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
67 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
68
69 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
70 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
71 struct display_mode_lib *mode_lib);
72 static unsigned int dscceComputeDelay(
73 unsigned int bpc,
74 double BPP,
75 unsigned int sliceWidth,
76 unsigned int numSlices,
77 enum output_format_class pixelFormat,
78 enum output_encoder_class Output);
79 static unsigned int dscComputeDelay(
80 enum output_format_class pixelFormat,
81 enum output_encoder_class Output);
82 // Super monster function with some 45 argument
83 static bool CalculatePrefetchSchedule(
84 struct display_mode_lib *mode_lib,
85 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
86 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
87 Pipe *myPipe,
88 unsigned int DSCDelay,
89 double DPPCLKDelaySubtotalPlusCNVCFormater,
90 double DPPCLKDelaySCL,
91 double DPPCLKDelaySCLLBOnly,
92 double DPPCLKDelayCNVCCursor,
93 double DISPCLKDelaySubtotal,
94 unsigned int DPP_RECOUT_WIDTH,
95 enum output_format_class OutputFormat,
96 unsigned int MaxInterDCNTileRepeaters,
97 unsigned int VStartup,
98 unsigned int MaxVStartup,
99 unsigned int GPUVMPageTableLevels,
100 bool GPUVMEnable,
101 bool HostVMEnable,
102 unsigned int HostVMMaxNonCachedPageTableLevels,
103 double HostVMMinPageSize,
104 bool DynamicMetadataEnable,
105 bool DynamicMetadataVMEnabled,
106 int DynamicMetadataLinesBeforeActiveRequired,
107 unsigned int DynamicMetadataTransmittedBytes,
108 double UrgentLatency,
109 double UrgentExtraLatency,
110 double TCalc,
111 unsigned int PDEAndMetaPTEBytesFrame,
112 unsigned int MetaRowByte,
113 unsigned int PixelPTEBytesPerRow,
114 double PrefetchSourceLinesY,
115 unsigned int SwathWidthY,
116 int BytePerPixelY,
117 double VInitPreFillY,
118 unsigned int MaxNumSwathY,
119 double PrefetchSourceLinesC,
120 unsigned int SwathWidthC,
121 int BytePerPixelC,
122 double VInitPreFillC,
123 unsigned int MaxNumSwathC,
124 long swath_width_luma_ub,
125 long swath_width_chroma_ub,
126 unsigned int SwathHeightY,
127 unsigned int SwathHeightC,
128 double TWait,
129 bool ProgressiveToInterlaceUnitInOPP,
130 double *DSTXAfterScaler,
131 double *DSTYAfterScaler,
132 double *DestinationLinesForPrefetch,
133 double *PrefetchBandwidth,
134 double *DestinationLinesToRequestVMInVBlank,
135 double *DestinationLinesToRequestRowInVBlank,
136 double *VRatioPrefetchY,
137 double *VRatioPrefetchC,
138 double *RequiredPrefetchPixDataBWLuma,
139 double *RequiredPrefetchPixDataBWChroma,
140 bool *NotEnoughTimeForDynamicMetadata,
141 double *Tno_bw,
142 double *prefetch_vmrow_bw,
143 double *Tdmdl_vm,
144 double *Tdmdl,
145 unsigned int *VUpdateOffsetPix,
146 double *VUpdateWidthPix,
147 double *VReadyOffsetPix);
148 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
149 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
150 static void CalculateDCCConfiguration(
151 bool DCCEnabled,
152 bool DCCProgrammingAssumesScanDirectionUnknown,
153 enum source_format_class SourcePixelFormat,
154 unsigned int ViewportWidthLuma,
155 unsigned int ViewportWidthChroma,
156 unsigned int ViewportHeightLuma,
157 unsigned int ViewportHeightChroma,
158 double DETBufferSize,
159 unsigned int RequestHeight256ByteLuma,
160 unsigned int RequestHeight256ByteChroma,
161 enum dm_swizzle_mode TilingFormat,
162 unsigned int BytePerPixelY,
163 unsigned int BytePerPixelC,
164 double BytePerPixelDETY,
165 double BytePerPixelDETC,
166 enum scan_direction_class ScanOrientation,
167 unsigned int *MaxUncompressedBlockLuma,
168 unsigned int *MaxUncompressedBlockChroma,
169 unsigned int *MaxCompressedBlockLuma,
170 unsigned int *MaxCompressedBlockChroma,
171 unsigned int *IndependentBlockLuma,
172 unsigned int *IndependentBlockChroma);
173 static double CalculatePrefetchSourceLines(
174 struct display_mode_lib *mode_lib,
175 double VRatio,
176 double vtaps,
177 bool Interlace,
178 bool ProgressiveToInterlaceUnitInOPP,
179 unsigned int SwathHeight,
180 unsigned int ViewportYStart,
181 double *VInitPreFill,
182 unsigned int *MaxNumSwath);
183 static unsigned int CalculateVMAndRowBytes(
184 struct display_mode_lib *mode_lib,
185 bool DCCEnable,
186 unsigned int BlockHeight256Bytes,
187 unsigned int BlockWidth256Bytes,
188 enum source_format_class SourcePixelFormat,
189 unsigned int SurfaceTiling,
190 unsigned int BytePerPixel,
191 enum scan_direction_class ScanDirection,
192 unsigned int SwathWidth,
193 unsigned int ViewportHeight,
194 bool GPUVMEnable,
195 bool HostVMEnable,
196 unsigned int HostVMMaxNonCachedPageTableLevels,
197 unsigned int GPUVMMinPageSize,
198 unsigned int HostVMMinPageSize,
199 unsigned int PTEBufferSizeInRequests,
200 unsigned int Pitch,
201 unsigned int DCCMetaPitch,
202 unsigned int *MacroTileWidth,
203 unsigned int *MetaRowByte,
204 unsigned int *PixelPTEBytesPerRow,
205 bool *PTEBufferSizeNotExceeded,
206 unsigned int *dpte_row_width_ub,
207 unsigned int *dpte_row_height,
208 unsigned int *MetaRequestWidth,
209 unsigned int *MetaRequestHeight,
210 unsigned int *meta_row_width,
211 unsigned int *meta_row_height,
212 unsigned int *vm_group_bytes,
213 unsigned int *dpte_group_bytes,
214 unsigned int *PixelPTEReqWidth,
215 unsigned int *PixelPTEReqHeight,
216 unsigned int *PTERequestSize,
217 unsigned int *DPDE0BytesFrame,
218 unsigned int *MetaPTEBytesFrame);
219 static double CalculateTWait(
220 unsigned int PrefetchMode,
221 double DRAMClockChangeLatency,
222 double UrgentLatency,
223 double SREnterPlusExitTime);
224 static void CalculateRowBandwidth(
225 bool GPUVMEnable,
226 enum source_format_class SourcePixelFormat,
227 double VRatio,
228 double VRatioChroma,
229 bool DCCEnable,
230 double LineTime,
231 unsigned int MetaRowByteLuma,
232 unsigned int MetaRowByteChroma,
233 unsigned int meta_row_height_luma,
234 unsigned int meta_row_height_chroma,
235 unsigned int PixelPTEBytesPerRowLuma,
236 unsigned int PixelPTEBytesPerRowChroma,
237 unsigned int dpte_row_height_luma,
238 unsigned int dpte_row_height_chroma,
239 double *meta_row_bw,
240 double *dpte_row_bw);
241 static void CalculateFlipSchedule(
242 struct display_mode_lib *mode_lib,
243 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
244 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
245 double UrgentExtraLatency,
246 double UrgentLatency,
247 unsigned int GPUVMMaxPageTableLevels,
248 bool HostVMEnable,
249 unsigned int HostVMMaxNonCachedPageTableLevels,
250 bool GPUVMEnable,
251 double HostVMMinPageSize,
252 double PDEAndMetaPTEBytesPerFrame,
253 double MetaRowBytes,
254 double DPTEBytesPerRow,
255 double BandwidthAvailableForImmediateFlip,
256 unsigned int TotImmediateFlipBytes,
257 enum source_format_class SourcePixelFormat,
258 double LineTime,
259 double VRatio,
260 double VRatioChroma,
261 double Tno_bw,
262 bool DCCEnable,
263 unsigned int dpte_row_height,
264 unsigned int meta_row_height,
265 unsigned int dpte_row_height_chroma,
266 unsigned int meta_row_height_chroma,
267 double *DestinationLinesToRequestVMInImmediateFlip,
268 double *DestinationLinesToRequestRowInImmediateFlip,
269 double *final_flip_bw,
270 bool *ImmediateFlipSupportedForPipe);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 long WritebackDestinationWidth,
277 long WritebackDestinationHeight,
278 long WritebackSourceHeight,
279 unsigned int HTotal);
280 static void CalculateDynamicMetadataParameters(
281 int MaxInterDCNTileRepeaters,
282 double DPPCLK,
283 double DISPCLK,
284 double DCFClkDeepSleep,
285 double PixelClock,
286 long HTotal,
287 long VBlank,
288 long DynamicMetadataTransmittedBytes,
289 long DynamicMetadataLinesBeforeActiveRequired,
290 int InterlaceEnable,
291 bool ProgressiveToInterlaceUnitInOPP,
292 double *Tsetup,
293 double *Tdmbf,
294 double *Tdmec,
295 double *Tdmsks);
296 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
297 struct display_mode_lib *mode_lib,
298 unsigned int PrefetchMode,
299 unsigned int NumberOfActivePlanes,
300 unsigned int MaxLineBufferLines,
301 unsigned int LineBufferSize,
302 unsigned int DPPOutputBufferPixels,
303 unsigned int DETBufferSizeInKByte,
304 unsigned int WritebackInterfaceBufferSize,
305 double DCFCLK,
306 double ReturnBW,
307 bool GPUVMEnable,
308 unsigned int dpte_group_bytes[],
309 unsigned int MetaChunkSize,
310 double UrgentLatency,
311 double ExtraLatency,
312 double WritebackLatency,
313 double WritebackChunkSize,
314 double SOCCLK,
315 double DRAMClockChangeLatency,
316 double SRExitTime,
317 double SREnterPlusExitTime,
318 double DCFCLKDeepSleep,
319 unsigned int DPPPerPlane[],
320 bool DCCEnable[],
321 double DPPCLK[],
322 unsigned int DETBufferSizeY[],
323 unsigned int DETBufferSizeC[],
324 unsigned int SwathHeightY[],
325 unsigned int SwathHeightC[],
326 unsigned int LBBitPerPixel[],
327 double SwathWidthY[],
328 double SwathWidthC[],
329 double HRatio[],
330 double HRatioChroma[],
331 unsigned int vtaps[],
332 unsigned int VTAPsChroma[],
333 double VRatio[],
334 double VRatioChroma[],
335 unsigned int HTotal[],
336 double PixelClock[],
337 unsigned int BlendingAndTiming[],
338 double BytePerPixelDETY[],
339 double BytePerPixelDETC[],
340 double DSTXAfterScaler[],
341 double DSTYAfterScaler[],
342 bool WritebackEnable[],
343 enum source_format_class WritebackPixelFormat[],
344 double WritebackDestinationWidth[],
345 double WritebackDestinationHeight[],
346 double WritebackSourceHeight[],
347 enum clock_change_support *DRAMClockChangeSupport,
348 double *UrgentWatermark,
349 double *WritebackUrgentWatermark,
350 double *DRAMClockChangeWatermark,
351 double *WritebackDRAMClockChangeWatermark,
352 double *StutterExitWatermark,
353 double *StutterEnterPlusExitWatermark,
354 double *MinActiveDRAMClockChangeLatencySupported);
355 static void CalculateDCFCLKDeepSleep(
356 struct display_mode_lib *mode_lib,
357 unsigned int NumberOfActivePlanes,
358 int BytePerPixelY[],
359 int BytePerPixelC[],
360 double VRatio[],
361 double VRatioChroma[],
362 double SwathWidthY[],
363 double SwathWidthC[],
364 unsigned int DPPPerPlane[],
365 double HRatio[],
366 double HRatioChroma[],
367 double PixelClock[],
368 double PSCL_THROUGHPUT[],
369 double PSCL_THROUGHPUT_CHROMA[],
370 double DPPCLK[],
371 double ReadBandwidthLuma[],
372 double ReadBandwidthChroma[],
373 int ReturnBusWidth,
374 double *DCFCLKDeepSleep);
375 static void CalculateUrgentBurstFactor(
376 long swath_width_luma_ub,
377 long swath_width_chroma_ub,
378 unsigned int DETBufferSizeInKByte,
379 unsigned int SwathHeightY,
380 unsigned int SwathHeightC,
381 double LineTime,
382 double UrgentLatency,
383 double CursorBufferSize,
384 unsigned int CursorWidth,
385 unsigned int CursorBPP,
386 double VRatio,
387 double VRatioC,
388 double BytePerPixelInDETY,
389 double BytePerPixelInDETC,
390 double DETBufferSizeY,
391 double DETBufferSizeC,
392 double *UrgentBurstFactorCursor,
393 double *UrgentBurstFactorLuma,
394 double *UrgentBurstFactorChroma,
395 bool *NotEnoughUrgentLatencyHiding);
396
397 static void UseMinimumDCFCLK(
398 struct display_mode_lib *mode_lib,
399 int MaxInterDCNTileRepeaters,
400 int MaxPrefetchMode,
401 double FinalDRAMClockChangeLatency,
402 double SREnterPlusExitTime,
403 int ReturnBusWidth,
404 int RoundTripPingLatencyCycles,
405 int ReorderingBytes,
406 int PixelChunkSizeInKByte,
407 int MetaChunkSize,
408 bool GPUVMEnable,
409 int GPUVMMaxPageTableLevels,
410 bool HostVMEnable,
411 int NumberOfActivePlanes,
412 double HostVMMinPageSize,
413 int HostVMMaxNonCachedPageTableLevels,
414 bool DynamicMetadataVMEnabled,
415 enum immediate_flip_requirement ImmediateFlipRequirement,
416 bool ProgressiveToInterlaceUnitInOPP,
417 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
418 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
419 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
420 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
421 int VTotal[],
422 int VActive[],
423 int DynamicMetadataTransmittedBytes[],
424 int DynamicMetadataLinesBeforeActiveRequired[],
425 bool Interlace[],
426 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
427 double RequiredDISPCLK[][2],
428 double UrgLatency[],
429 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
430 double ProjectedDCFCLKDeepSleep[][2],
431 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
432 double TotalVActivePixelBandwidth[][2],
433 double TotalVActiveCursorBandwidth[][2],
434 double TotalMetaRowBandwidth[][2],
435 double TotalDPTERowBandwidth[][2],
436 unsigned int TotalNumberOfActiveDPP[][2],
437 unsigned int TotalNumberOfDCCActiveDPP[][2],
438 int dpte_group_bytes[],
439 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
440 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
441 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
442 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
443 int BytePerPixelY[],
444 int BytePerPixelC[],
445 int HTotal[],
446 double PixelClock[],
447 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
448 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
449 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
450 bool DynamicMetadataEnable[],
451 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
452 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
453 double ReadBandwidthLuma[],
454 double ReadBandwidthChroma[],
455 double DCFCLKPerState[],
456 double DCFCLKState[][2]);
457 static void CalculatePixelDeliveryTimes(
458 unsigned int NumberOfActivePlanes,
459 double VRatio[],
460 double VRatioChroma[],
461 double VRatioPrefetchY[],
462 double VRatioPrefetchC[],
463 unsigned int swath_width_luma_ub[],
464 unsigned int swath_width_chroma_ub[],
465 unsigned int DPPPerPlane[],
466 double HRatio[],
467 double HRatioChroma[],
468 double PixelClock[],
469 double PSCL_THROUGHPUT[],
470 double PSCL_THROUGHPUT_CHROMA[],
471 double DPPCLK[],
472 int BytePerPixelC[],
473 enum scan_direction_class SourceScan[],
474 unsigned int NumberOfCursors[],
475 unsigned int CursorWidth[][2],
476 unsigned int CursorBPP[][2],
477 unsigned int BlockWidth256BytesY[],
478 unsigned int BlockHeight256BytesY[],
479 unsigned int BlockWidth256BytesC[],
480 unsigned int BlockHeight256BytesC[],
481 double DisplayPipeLineDeliveryTimeLuma[],
482 double DisplayPipeLineDeliveryTimeChroma[],
483 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
484 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
485 double DisplayPipeRequestDeliveryTimeLuma[],
486 double DisplayPipeRequestDeliveryTimeChroma[],
487 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
488 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
489 double CursorRequestDeliveryTime[],
490 double CursorRequestDeliveryTimePrefetch[]);
491
492 static void CalculateMetaAndPTETimes(
493 int NumberOfActivePlanes,
494 bool GPUVMEnable,
495 int MetaChunkSize,
496 int MinMetaChunkSizeBytes,
497 int HTotal[],
498 double VRatio[],
499 double VRatioChroma[],
500 double DestinationLinesToRequestRowInVBlank[],
501 double DestinationLinesToRequestRowInImmediateFlip[],
502 bool DCCEnable[],
503 double PixelClock[],
504 int BytePerPixelY[],
505 int BytePerPixelC[],
506 enum scan_direction_class SourceScan[],
507 int dpte_row_height[],
508 int dpte_row_height_chroma[],
509 int meta_row_width[],
510 int meta_row_width_chroma[],
511 int meta_row_height[],
512 int meta_row_height_chroma[],
513 int meta_req_width[],
514 int meta_req_width_chroma[],
515 int meta_req_height[],
516 int meta_req_height_chroma[],
517 int dpte_group_bytes[],
518 int PTERequestSizeY[],
519 int PTERequestSizeC[],
520 int PixelPTEReqWidthY[],
521 int PixelPTEReqHeightY[],
522 int PixelPTEReqWidthC[],
523 int PixelPTEReqHeightC[],
524 int dpte_row_width_luma_ub[],
525 int dpte_row_width_chroma_ub[],
526 double DST_Y_PER_PTE_ROW_NOM_L[],
527 double DST_Y_PER_PTE_ROW_NOM_C[],
528 double DST_Y_PER_META_ROW_NOM_L[],
529 double DST_Y_PER_META_ROW_NOM_C[],
530 double TimePerMetaChunkNominal[],
531 double TimePerChromaMetaChunkNominal[],
532 double TimePerMetaChunkVBlank[],
533 double TimePerChromaMetaChunkVBlank[],
534 double TimePerMetaChunkFlip[],
535 double TimePerChromaMetaChunkFlip[],
536 double time_per_pte_group_nom_luma[],
537 double time_per_pte_group_vblank_luma[],
538 double time_per_pte_group_flip_luma[],
539 double time_per_pte_group_nom_chroma[],
540 double time_per_pte_group_vblank_chroma[],
541 double time_per_pte_group_flip_chroma[]);
542
543 static void CalculateVMGroupAndRequestTimes(
544 unsigned int NumberOfActivePlanes,
545 bool GPUVMEnable,
546 unsigned int GPUVMMaxPageTableLevels,
547 unsigned int HTotal[],
548 int BytePerPixelC[],
549 double DestinationLinesToRequestVMInVBlank[],
550 double DestinationLinesToRequestVMInImmediateFlip[],
551 bool DCCEnable[],
552 double PixelClock[],
553 int dpte_row_width_luma_ub[],
554 int dpte_row_width_chroma_ub[],
555 int vm_group_bytes[],
556 unsigned int dpde0_bytes_per_frame_ub_l[],
557 unsigned int dpde0_bytes_per_frame_ub_c[],
558 int meta_pte_bytes_per_frame_ub_l[],
559 int meta_pte_bytes_per_frame_ub_c[],
560 double TimePerVMGroupVBlank[],
561 double TimePerVMGroupFlip[],
562 double TimePerVMRequestVBlank[],
563 double TimePerVMRequestFlip[]);
564
565 static void CalculateStutterEfficiency(
566 int NumberOfActivePlanes,
567 long ROBBufferSizeInKByte,
568 double TotalDataReadBandwidth,
569 double DCFCLK,
570 double ReturnBW,
571 double SRExitTime,
572 bool SynchronizedVBlank,
573 int DPPPerPlane[],
574 unsigned int DETBufferSizeY[],
575 int BytePerPixelY[],
576 double BytePerPixelDETY[],
577 double SwathWidthY[],
578 int SwathHeightY[],
579 int SwathHeightC[],
580 double DCCRateLuma[],
581 double DCCRateChroma[],
582 int HTotal[],
583 int VTotal[],
584 double PixelClock[],
585 double VRatio[],
586 enum scan_direction_class SourceScan[],
587 int BlockHeight256BytesY[],
588 int BlockWidth256BytesY[],
589 int BlockHeight256BytesC[],
590 int BlockWidth256BytesC[],
591 int DCCYMaxUncompressedBlock[],
592 int DCCCMaxUncompressedBlock[],
593 int VActive[],
594 bool DCCEnable[],
595 bool WritebackEnable[],
596 double ReadBandwidthPlaneLuma[],
597 double ReadBandwidthPlaneChroma[],
598 double meta_row_bw[],
599 double dpte_row_bw[],
600 double *StutterEfficiencyNotIncludingVBlank,
601 double *StutterEfficiency,
602 double *StutterPeriodOut);
603
604 static void CalculateSwathAndDETConfiguration(
605 bool ForceSingleDPP,
606 int NumberOfActivePlanes,
607 unsigned int DETBufferSizeInKByte,
608 double MaximumSwathWidthLuma[],
609 double MaximumSwathWidthChroma[],
610 enum scan_direction_class SourceScan[],
611 enum source_format_class SourcePixelFormat[],
612 enum dm_swizzle_mode SurfaceTiling[],
613 int ViewportWidth[],
614 int ViewportHeight[],
615 int SurfaceWidthY[],
616 int SurfaceWidthC[],
617 int SurfaceHeightY[],
618 int SurfaceHeightC[],
619 int Read256BytesBlockHeightY[],
620 int Read256BytesBlockHeightC[],
621 int Read256BytesBlockWidthY[],
622 int Read256BytesBlockWidthC[],
623 enum odm_combine_mode ODMCombineEnabled[],
624 int BlendingAndTiming[],
625 int BytePerPixY[],
626 int BytePerPixC[],
627 double BytePerPixDETY[],
628 double BytePerPixDETC[],
629 int HActive[],
630 double HRatio[],
631 double HRatioChroma[],
632 int DPPPerPlane[],
633 int swath_width_luma_ub[],
634 int swath_width_chroma_ub[],
635 double SwathWidth[],
636 double SwathWidthChroma[],
637 int SwathHeightY[],
638 int SwathHeightC[],
639 unsigned int DETBufferSizeY[],
640 unsigned int DETBufferSizeC[],
641 bool ViewportSizeSupportPerPlane[],
642 bool *ViewportSizeSupport);
643 static void CalculateSwathWidth(
644 bool ForceSingleDPP,
645 int NumberOfActivePlanes,
646 enum source_format_class SourcePixelFormat[],
647 enum scan_direction_class SourceScan[],
648 unsigned int ViewportWidth[],
649 unsigned int ViewportHeight[],
650 unsigned int SurfaceWidthY[],
651 unsigned int SurfaceWidthC[],
652 unsigned int SurfaceHeightY[],
653 unsigned int SurfaceHeightC[],
654 enum odm_combine_mode ODMCombineEnabled[],
655 int BytePerPixY[],
656 int BytePerPixC[],
657 int Read256BytesBlockHeightY[],
658 int Read256BytesBlockHeightC[],
659 int Read256BytesBlockWidthY[],
660 int Read256BytesBlockWidthC[],
661 int BlendingAndTiming[],
662 unsigned int HActive[],
663 double HRatio[],
664 int DPPPerPlane[],
665 double SwathWidthSingleDPPY[],
666 double SwathWidthSingleDPPC[],
667 double SwathWidthY[],
668 double SwathWidthC[],
669 int MaximumSwathHeightY[],
670 int MaximumSwathHeightC[],
671 unsigned int swath_width_luma_ub[],
672 unsigned int swath_width_chroma_ub[]);
673 static double CalculateExtraLatency(
674 long RoundTripPingLatencyCycles,
675 long ReorderingBytes,
676 double DCFCLK,
677 int TotalNumberOfActiveDPP,
678 int PixelChunkSizeInKByte,
679 int TotalNumberOfDCCActiveDPP,
680 int MetaChunkSize,
681 double ReturnBW,
682 bool GPUVMEnable,
683 bool HostVMEnable,
684 int NumberOfActivePlanes,
685 int NumberOfDPP[],
686 int dpte_group_bytes[],
687 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
688 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
689 double HostVMMinPageSize,
690 int HostVMMaxNonCachedPageTableLevels);
691 static double CalculateExtraLatencyBytes(
692 long ReorderingBytes,
693 int TotalNumberOfActiveDPP,
694 int PixelChunkSizeInKByte,
695 int TotalNumberOfDCCActiveDPP,
696 int MetaChunkSize,
697 bool GPUVMEnable,
698 bool HostVMEnable,
699 int NumberOfActivePlanes,
700 int NumberOfDPP[],
701 int dpte_group_bytes[],
702 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
703 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
704 double HostVMMinPageSize,
705 int HostVMMaxNonCachedPageTableLevels);
706 static double CalculateUrgentLatency(
707 double UrgentLatencyPixelDataOnly,
708 double UrgentLatencyPixelMixedWithVMData,
709 double UrgentLatencyVMDataOnly,
710 bool DoUrgentLatencyAdjustment,
711 double UrgentLatencyAdjustmentFabricClockComponent,
712 double UrgentLatencyAdjustmentFabricClockReference,
713 double FabricClockSingle);
714
715 static bool CalculateBytePerPixelAnd256BBlockSizes(
716 enum source_format_class SourcePixelFormat,
717 enum dm_swizzle_mode SurfaceTiling,
718 unsigned int *BytePerPixelY,
719 unsigned int *BytePerPixelC,
720 double *BytePerPixelDETY,
721 double *BytePerPixelDETC,
722 unsigned int *BlockHeight256BytesY,
723 unsigned int *BlockHeight256BytesC,
724 unsigned int *BlockWidth256BytesY,
725 unsigned int *BlockWidth256BytesC);
726
dml30_recalculate(struct display_mode_lib * mode_lib)727 void dml30_recalculate(struct display_mode_lib *mode_lib)
728 {
729 ModeSupportAndSystemConfiguration(mode_lib);
730 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
731 DisplayPipeConfiguration(mode_lib);
732 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
733 }
734
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)735 static unsigned int dscceComputeDelay(
736 unsigned int bpc,
737 double BPP,
738 unsigned int sliceWidth,
739 unsigned int numSlices,
740 enum output_format_class pixelFormat,
741 enum output_encoder_class Output)
742 {
743 // valid bpc = source bits per component in the set of {8, 10, 12}
744 // valid bpp = increments of 1/16 of a bit
745 // min = 6/7/8 in N420/N422/444, respectively
746 // max = such that compression is 1:1
747 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
748 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
749 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
750
751 // fixed value
752 unsigned int rcModelSize = 8192;
753
754 // N422/N420 operate at 2 pixels per clock
755 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
756 Delay, pixels;
757
758 if (pixelFormat == dm_420)
759 pixelsPerClock = 2;
760 // #all other modes operate at 1 pixel per clock
761 else if (pixelFormat == dm_444)
762 pixelsPerClock = 1;
763 else if (pixelFormat == dm_n422)
764 pixelsPerClock = 2;
765 else
766 pixelsPerClock = 1;
767
768 //initial transmit delay as per PPS
769 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
770
771 //compute ssm delay
772 if (bpc == 8)
773 D = 81;
774 else if (bpc == 10)
775 D = 89;
776 else
777 D = 113;
778
779 //divide by pixel per cycle to compute slice width as seen by DSC
780 w = sliceWidth / pixelsPerClock;
781
782 //422 mode has an additional cycle of delay
783 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
784 s = 0;
785 else
786 s = 1;
787
788 //main calculation for the dscce
789 ix = initalXmitDelay + 45;
790 wx = (w + 2) / 3;
791 P = 3 * wx - w;
792 l0 = ix / w;
793 a = ix + P * l0;
794 ax = (a + 2) / 3 + D + 6 + 1;
795 L = (ax + wx - 1) / wx;
796 if ((ix % w) == 0 && P != 0)
797 lstall = 1;
798 else
799 lstall = 0;
800 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
801
802 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
803 pixels = Delay * 3 * pixelsPerClock;
804 return pixels;
805 }
806
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)807 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
808 {
809 unsigned int Delay = 0;
810
811 if (pixelFormat == dm_420) {
812 // sfr
813 Delay = Delay + 2;
814 // dsccif
815 Delay = Delay + 0;
816 // dscc - input deserializer
817 Delay = Delay + 3;
818 // dscc gets pixels every other cycle
819 Delay = Delay + 2;
820 // dscc - input cdc fifo
821 Delay = Delay + 12;
822 // dscc gets pixels every other cycle
823 Delay = Delay + 13;
824 // dscc - cdc uncertainty
825 Delay = Delay + 2;
826 // dscc - output cdc fifo
827 Delay = Delay + 7;
828 // dscc gets pixels every other cycle
829 Delay = Delay + 3;
830 // dscc - cdc uncertainty
831 Delay = Delay + 2;
832 // dscc - output serializer
833 Delay = Delay + 1;
834 // sft
835 Delay = Delay + 1;
836 } else if (pixelFormat == dm_n422) {
837 // sfr
838 Delay = Delay + 2;
839 // dsccif
840 Delay = Delay + 1;
841 // dscc - input deserializer
842 Delay = Delay + 5;
843 // dscc - input cdc fifo
844 Delay = Delay + 25;
845 // dscc - cdc uncertainty
846 Delay = Delay + 2;
847 // dscc - output cdc fifo
848 Delay = Delay + 10;
849 // dscc - cdc uncertainty
850 Delay = Delay + 2;
851 // dscc - output serializer
852 Delay = Delay + 1;
853 // sft
854 Delay = Delay + 1;
855 }
856 else {
857 // sfr
858 Delay = Delay + 2;
859 // dsccif
860 Delay = Delay + 0;
861 // dscc - input deserializer
862 Delay = Delay + 3;
863 // dscc - input cdc fifo
864 Delay = Delay + 12;
865 // dscc - cdc uncertainty
866 Delay = Delay + 2;
867 // dscc - output cdc fifo
868 Delay = Delay + 7;
869 // dscc - output serializer
870 Delay = Delay + 1;
871 // dscc - cdc uncertainty
872 Delay = Delay + 2;
873 // sft
874 Delay = Delay + 1;
875 }
876
877 return Delay;
878 }
879
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,int BytePerPixelY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,int BytePerPixelC,double VInitPreFillC,unsigned int MaxNumSwathC,long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)880 static bool CalculatePrefetchSchedule(
881 struct display_mode_lib *mode_lib,
882 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
883 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
884 Pipe *myPipe,
885 unsigned int DSCDelay,
886 double DPPCLKDelaySubtotalPlusCNVCFormater,
887 double DPPCLKDelaySCL,
888 double DPPCLKDelaySCLLBOnly,
889 double DPPCLKDelayCNVCCursor,
890 double DISPCLKDelaySubtotal,
891 unsigned int DPP_RECOUT_WIDTH,
892 enum output_format_class OutputFormat,
893 unsigned int MaxInterDCNTileRepeaters,
894 unsigned int VStartup,
895 unsigned int MaxVStartup,
896 unsigned int GPUVMPageTableLevels,
897 bool GPUVMEnable,
898 bool HostVMEnable,
899 unsigned int HostVMMaxNonCachedPageTableLevels,
900 double HostVMMinPageSize,
901 bool DynamicMetadataEnable,
902 bool DynamicMetadataVMEnabled,
903 int DynamicMetadataLinesBeforeActiveRequired,
904 unsigned int DynamicMetadataTransmittedBytes,
905 double UrgentLatency,
906 double UrgentExtraLatency,
907 double TCalc,
908 unsigned int PDEAndMetaPTEBytesFrame,
909 unsigned int MetaRowByte,
910 unsigned int PixelPTEBytesPerRow,
911 double PrefetchSourceLinesY,
912 unsigned int SwathWidthY,
913 int BytePerPixelY,
914 double VInitPreFillY,
915 unsigned int MaxNumSwathY,
916 double PrefetchSourceLinesC,
917 unsigned int SwathWidthC,
918 int BytePerPixelC,
919 double VInitPreFillC,
920 unsigned int MaxNumSwathC,
921 long swath_width_luma_ub,
922 long swath_width_chroma_ub,
923 unsigned int SwathHeightY,
924 unsigned int SwathHeightC,
925 double TWait,
926 bool ProgressiveToInterlaceUnitInOPP,
927 double *DSTXAfterScaler,
928 double *DSTYAfterScaler,
929 double *DestinationLinesForPrefetch,
930 double *PrefetchBandwidth,
931 double *DestinationLinesToRequestVMInVBlank,
932 double *DestinationLinesToRequestRowInVBlank,
933 double *VRatioPrefetchY,
934 double *VRatioPrefetchC,
935 double *RequiredPrefetchPixDataBWLuma,
936 double *RequiredPrefetchPixDataBWChroma,
937 bool *NotEnoughTimeForDynamicMetadata,
938 double *Tno_bw,
939 double *prefetch_vmrow_bw,
940 double *Tdmdl_vm,
941 double *Tdmdl,
942 unsigned int *VUpdateOffsetPix,
943 double *VUpdateWidthPix,
944 double *VReadyOffsetPix)
945 {
946 bool MyError = false;
947 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
948 double DSTTotalPixelsAfterScaler = 0;
949 double LineTime = 0, Tsetup = 0;
950 double dst_y_prefetch_equ = 0;
951 double Tsw_oto = 0;
952 double prefetch_bw_oto = 0;
953 double Tvm_oto = 0;
954 double Tr0_oto = 0;
955 double Tvm_oto_lines = 0;
956 double Tr0_oto_lines = 0;
957 double dst_y_prefetch_oto = 0;
958 double TimeForFetchingMetaPTE = 0;
959 double TimeForFetchingRowInVBlank = 0;
960 double LinesToRequestPrefetchPixelData = 0;
961 double HostVMInefficiencyFactor = 0;
962 unsigned int HostVMDynamicLevelsTrips = 0;
963 double trip_to_mem = 0;
964 double Tvm_trips = 0;
965 double Tr0_trips = 0;
966 double Tvm_trips_rounded = 0;
967 double Tr0_trips_rounded = 0;
968 double Lsw_oto = 0;
969 double Tpre_rounded = 0;
970 double prefetch_bw_equ = 0;
971 double Tvm_equ = 0;
972 double Tr0_equ = 0;
973 double Tdmbf = 0;
974 double Tdmec = 0;
975 double Tdmsks = 0;
976
977 if (GPUVMEnable == true && HostVMEnable == true) {
978 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
979 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
980 } else {
981 HostVMInefficiencyFactor = 1;
982 HostVMDynamicLevelsTrips = 0;
983 }
984
985 CalculateDynamicMetadataParameters(
986 MaxInterDCNTileRepeaters,
987 myPipe->DPPCLK,
988 myPipe->DISPCLK,
989 myPipe->DCFCLKDeepSleep,
990 myPipe->PixelClock,
991 myPipe->HTotal,
992 myPipe->VBlank,
993 DynamicMetadataTransmittedBytes,
994 DynamicMetadataLinesBeforeActiveRequired,
995 myPipe->InterlaceEnable,
996 ProgressiveToInterlaceUnitInOPP,
997 &Tsetup,
998 &Tdmbf,
999 &Tdmec,
1000 &Tdmsks);
1001
1002 LineTime = myPipe->HTotal / myPipe->PixelClock;
1003 trip_to_mem = UrgentLatency;
1004 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1005
1006 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1007 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1008 } else {
1009 *Tdmdl = TWait + UrgentExtraLatency;
1010 }
1011
1012 if (DynamicMetadataEnable == true) {
1013 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1014 *NotEnoughTimeForDynamicMetadata = true;
1015 } else {
1016 *NotEnoughTimeForDynamicMetadata = false;
1017 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
1018 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1019 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1020 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1021 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1022 }
1023 } else {
1024 *NotEnoughTimeForDynamicMetadata = false;
1025 }
1026
1027 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1028
1029 if (myPipe->ScalerEnabled)
1030 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1031 else
1032 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1033
1034 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1035
1036 DISPCLKCycles = DISPCLKDelaySubtotal;
1037
1038 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1039 return true;
1040
1041 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
1042 + DSCDelay;
1043
1044 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1045
1046 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
1047 *DSTYAfterScaler = 1;
1048 else
1049 *DSTYAfterScaler = 0;
1050
1051 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1052 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1053 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1054
1055 MyError = false;
1056
1057
1058 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1059 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1060 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1061
1062 if (GPUVMEnable) {
1063 if (GPUVMPageTableLevels >= 3) {
1064 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1065 } else
1066 *Tno_bw = 0;
1067 } else if (!myPipe->DCCEnable)
1068 *Tno_bw = LineTime;
1069 else
1070 *Tno_bw = LineTime / 4;
1071
1072 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1073 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1074
1075 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1076 Tsw_oto = Lsw_oto * LineTime;
1077
1078 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1079
1080 if (GPUVMEnable == true) {
1081 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1082 Tvm_trips,
1083 LineTime / 4.0);
1084 } else
1085 Tvm_oto = LineTime / 4.0;
1086
1087 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1088 Tr0_oto = dml_max3(
1089 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1090 LineTime - Tvm_oto, LineTime / 4);
1091 } else
1092 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1093
1094 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1095 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1096 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1097
1098 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1099 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1100
1101 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1102 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1103
1104 dml_print("DML: LineTime: %f\n", LineTime);
1105 dml_print("DML: VStartup: %d\n", VStartup);
1106 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1107 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1108 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1109 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1110 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1111 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1112 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1113 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1114 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1115 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1116 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1117
1118 *PrefetchBandwidth = 0;
1119 *DestinationLinesToRequestVMInVBlank = 0;
1120 *DestinationLinesToRequestRowInVBlank = 0;
1121 *VRatioPrefetchY = 0;
1122 *VRatioPrefetchC = 0;
1123 *RequiredPrefetchPixDataBWLuma = 0;
1124 if (dst_y_prefetch_equ > 1) {
1125 double PrefetchBandwidth1 = 0;
1126 double PrefetchBandwidth2 = 0;
1127 double PrefetchBandwidth3 = 0;
1128 double PrefetchBandwidth4 = 0;
1129
1130 if (Tpre_rounded - *Tno_bw > 0)
1131 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1132 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1133 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1134 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1135 / (Tpre_rounded - *Tno_bw);
1136 else
1137 PrefetchBandwidth1 = 0;
1138
1139 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1140 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1141 }
1142
1143 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1144 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1145 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1146 swath_width_luma_ub * BytePerPixelY +
1147 PrefetchSourceLinesC * swath_width_chroma_ub *
1148 BytePerPixelC) /
1149 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1150 else
1151 PrefetchBandwidth2 = 0;
1152
1153 if (Tpre_rounded - Tvm_trips_rounded > 0)
1154 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1155 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1156 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1157 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1158 Tvm_trips_rounded);
1159 else
1160 PrefetchBandwidth3 = 0;
1161
1162 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1163 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1164 }
1165
1166 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1167 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1168 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1169 else
1170 PrefetchBandwidth4 = 0;
1171
1172 {
1173 bool Case1OK;
1174 bool Case2OK;
1175 bool Case3OK;
1176
1177 if (PrefetchBandwidth1 > 0) {
1178 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1179 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1180 Case1OK = true;
1181 } else {
1182 Case1OK = false;
1183 }
1184 } else {
1185 Case1OK = false;
1186 }
1187
1188 if (PrefetchBandwidth2 > 0) {
1189 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1190 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1191 Case2OK = true;
1192 } else {
1193 Case2OK = false;
1194 }
1195 } else {
1196 Case2OK = false;
1197 }
1198
1199 if (PrefetchBandwidth3 > 0) {
1200 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1201 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1202 Case3OK = true;
1203 } else {
1204 Case3OK = false;
1205 }
1206 } else {
1207 Case3OK = false;
1208 }
1209
1210 if (Case1OK) {
1211 prefetch_bw_equ = PrefetchBandwidth1;
1212 } else if (Case2OK) {
1213 prefetch_bw_equ = PrefetchBandwidth2;
1214 } else if (Case3OK) {
1215 prefetch_bw_equ = PrefetchBandwidth3;
1216 } else {
1217 prefetch_bw_equ = PrefetchBandwidth4;
1218 }
1219
1220 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1221
1222 if (prefetch_bw_equ > 0) {
1223 if (GPUVMEnable) {
1224 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1225 } else {
1226 Tvm_equ = LineTime / 4;
1227 }
1228
1229 if ((GPUVMEnable || myPipe->DCCEnable)) {
1230 Tr0_equ = dml_max4(
1231 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1232 Tr0_trips,
1233 (LineTime - Tvm_equ) / 2,
1234 LineTime / 4);
1235 } else {
1236 Tr0_equ = (LineTime - Tvm_equ) / 2;
1237 }
1238 } else {
1239 Tvm_equ = 0;
1240 Tr0_equ = 0;
1241 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1242 }
1243 }
1244
1245 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1246 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1247 TimeForFetchingMetaPTE = Tvm_oto;
1248 TimeForFetchingRowInVBlank = Tr0_oto;
1249 *PrefetchBandwidth = prefetch_bw_oto;
1250 } else {
1251 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1252 TimeForFetchingMetaPTE = Tvm_equ;
1253 TimeForFetchingRowInVBlank = Tr0_equ;
1254 *PrefetchBandwidth = prefetch_bw_equ;
1255 }
1256
1257 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1258
1259 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1260
1261
1262 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1263 - 2 * *DestinationLinesToRequestRowInVBlank;
1264
1265 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1266
1267 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1268 / LinesToRequestPrefetchPixelData;
1269 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1270 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1271 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1272 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1273 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1274 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1275 } else {
1276 MyError = true;
1277 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1278 *VRatioPrefetchY = 0;
1279 }
1280 }
1281
1282 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1283 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1284
1285 if ((SwathHeightC > 4)) {
1286 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1287 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1288 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1289 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1290 } else {
1291 MyError = true;
1292 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1293 *VRatioPrefetchC = 0;
1294 }
1295 }
1296
1297 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1298 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1299 } else {
1300 MyError = true;
1301 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1302 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1303 *VRatioPrefetchY = 0;
1304 *VRatioPrefetchC = 0;
1305 *RequiredPrefetchPixDataBWLuma = 0;
1306 *RequiredPrefetchPixDataBWChroma = 0;
1307 }
1308
1309 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1310 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1311 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1312 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1313 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1314 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1315 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1316 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1317 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1318
1319 } else {
1320 MyError = true;
1321 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1322 }
1323
1324 {
1325 double prefetch_vm_bw = 0;
1326 double prefetch_row_bw = 0;
1327
1328 if (PDEAndMetaPTEBytesFrame == 0) {
1329 prefetch_vm_bw = 0;
1330 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1331 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1332 } else {
1333 prefetch_vm_bw = 0;
1334 MyError = true;
1335 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1336 }
1337 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1338 prefetch_row_bw = 0;
1339 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1340 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1341 } else {
1342 prefetch_row_bw = 0;
1343 MyError = true;
1344 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1345 }
1346
1347 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1348 }
1349
1350 if (MyError) {
1351 *PrefetchBandwidth = 0;
1352 TimeForFetchingMetaPTE = 0;
1353 TimeForFetchingRowInVBlank = 0;
1354 *DestinationLinesToRequestVMInVBlank = 0;
1355 *DestinationLinesToRequestRowInVBlank = 0;
1356 *DestinationLinesForPrefetch = 0;
1357 LinesToRequestPrefetchPixelData = 0;
1358 *VRatioPrefetchY = 0;
1359 *VRatioPrefetchC = 0;
1360 *RequiredPrefetchPixDataBWLuma = 0;
1361 *RequiredPrefetchPixDataBWChroma = 0;
1362 }
1363
1364 return MyError;
1365 }
1366
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1367 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1368 {
1369 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1370 }
1371
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1372 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1373 {
1374 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1375 }
1376
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,double DETBufferSize,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1377 static void CalculateDCCConfiguration(
1378 bool DCCEnabled,
1379 bool DCCProgrammingAssumesScanDirectionUnknown,
1380 enum source_format_class SourcePixelFormat,
1381 unsigned int SurfaceWidthLuma,
1382 unsigned int SurfaceWidthChroma,
1383 unsigned int SurfaceHeightLuma,
1384 unsigned int SurfaceHeightChroma,
1385 double DETBufferSize,
1386 unsigned int RequestHeight256ByteLuma,
1387 unsigned int RequestHeight256ByteChroma,
1388 enum dm_swizzle_mode TilingFormat,
1389 unsigned int BytePerPixelY,
1390 unsigned int BytePerPixelC,
1391 double BytePerPixelDETY,
1392 double BytePerPixelDETC,
1393 enum scan_direction_class ScanOrientation,
1394 unsigned int *MaxUncompressedBlockLuma,
1395 unsigned int *MaxUncompressedBlockChroma,
1396 unsigned int *MaxCompressedBlockLuma,
1397 unsigned int *MaxCompressedBlockChroma,
1398 unsigned int *IndependentBlockLuma,
1399 unsigned int *IndependentBlockChroma)
1400 {
1401 int yuv420 = 0;
1402 int horz_div_l = 0;
1403 int horz_div_c = 0;
1404 int vert_div_l = 0;
1405 int vert_div_c = 0;
1406
1407 int req128_horz_wc_l = 0;
1408 int req128_horz_wc_c = 0;
1409 int req128_vert_wc_l = 0;
1410 int req128_vert_wc_c = 0;
1411 int segment_order_horz_contiguous_luma = 0;
1412 int segment_order_horz_contiguous_chroma = 0;
1413 int segment_order_vert_contiguous_luma = 0;
1414 int segment_order_vert_contiguous_chroma = 0;
1415
1416 long full_swath_bytes_horz_wc_l = 0;
1417 long full_swath_bytes_horz_wc_c = 0;
1418 long full_swath_bytes_vert_wc_l = 0;
1419 long full_swath_bytes_vert_wc_c = 0;
1420
1421 long swath_buf_size = 0;
1422 double detile_buf_vp_horz_limit = 0;
1423 double detile_buf_vp_vert_limit = 0;
1424
1425 long MAS_vp_horz_limit = 0;
1426 long MAS_vp_vert_limit = 0;
1427 long max_vp_horz_width = 0;
1428 long max_vp_vert_height = 0;
1429 long eff_surf_width_l = 0;
1430 long eff_surf_width_c = 0;
1431 long eff_surf_height_l = 0;
1432 long eff_surf_height_c = 0;
1433
1434 typedef enum {
1435 REQ_256Bytes,
1436 REQ_128BytesNonContiguous,
1437 REQ_128BytesContiguous,
1438 REQ_NA
1439 } RequestType;
1440
1441 RequestType RequestLuma;
1442 RequestType RequestChroma;
1443
1444 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1445 horz_div_l = 1;
1446 horz_div_c = 1;
1447 vert_div_l = 1;
1448 vert_div_c = 1;
1449
1450 if (BytePerPixelY == 1)
1451 vert_div_l = 0;
1452 if (BytePerPixelC == 1)
1453 vert_div_c = 0;
1454 if (BytePerPixelY == 8
1455 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1456 || TilingFormat == dm_sw_64kb_s_x))
1457 horz_div_l = 0;
1458 if (BytePerPixelC == 8
1459 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1460 || TilingFormat == dm_sw_64kb_s_x))
1461 horz_div_c = 0;
1462
1463 if (BytePerPixelC == 0) {
1464 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1465 detile_buf_vp_horz_limit = (double) swath_buf_size
1466 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1467 / (1 + horz_div_l));
1468 detile_buf_vp_vert_limit = (double) swath_buf_size
1469 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1470 } else {
1471 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1472 detile_buf_vp_horz_limit = (double) swath_buf_size
1473 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1474 / (1 + horz_div_l)
1475 + (double) RequestHeight256ByteChroma
1476 * BytePerPixelC / (1 + horz_div_c)
1477 / (1 + yuv420));
1478 detile_buf_vp_vert_limit = (double) swath_buf_size
1479 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1480 + 256.0 / RequestHeight256ByteChroma
1481 / (1 + vert_div_c) / (1 + yuv420));
1482 }
1483
1484 if (SourcePixelFormat == dm_420_10) {
1485 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1486 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1487 }
1488
1489 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1490 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1491
1492 MAS_vp_horz_limit = 5760;
1493 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1494 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1495 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1496 eff_surf_width_l =
1497 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1498 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1499 eff_surf_height_l = (
1500 SurfaceHeightLuma > max_vp_vert_height ?
1501 max_vp_vert_height : SurfaceHeightLuma);
1502 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1503
1504 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1505 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1506 if (BytePerPixelC > 0) {
1507 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1508 * BytePerPixelC;
1509 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1510 } else {
1511 full_swath_bytes_horz_wc_c = 0;
1512 full_swath_bytes_vert_wc_c = 0;
1513 }
1514
1515 if (SourcePixelFormat == dm_420_10) {
1516 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1517 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1518 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1519 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1520 }
1521
1522 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1523 req128_horz_wc_l = 0;
1524 req128_horz_wc_c = 0;
1525 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1526 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1527 <= DETBufferSize) {
1528 req128_horz_wc_l = 0;
1529 req128_horz_wc_c = 1;
1530 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1531 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1532 <= DETBufferSize) {
1533 req128_horz_wc_l = 1;
1534 req128_horz_wc_c = 0;
1535 } else {
1536 req128_horz_wc_l = 1;
1537 req128_horz_wc_c = 1;
1538 }
1539
1540 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1541 req128_vert_wc_l = 0;
1542 req128_vert_wc_c = 0;
1543 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1544 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1545 <= DETBufferSize) {
1546 req128_vert_wc_l = 0;
1547 req128_vert_wc_c = 1;
1548 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1549 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1550 <= DETBufferSize) {
1551 req128_vert_wc_l = 1;
1552 req128_vert_wc_c = 0;
1553 } else {
1554 req128_vert_wc_l = 1;
1555 req128_vert_wc_c = 1;
1556 }
1557
1558 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1559 segment_order_horz_contiguous_luma = 0;
1560 } else {
1561 segment_order_horz_contiguous_luma = 1;
1562 }
1563 if ((BytePerPixelY == 8
1564 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1565 || TilingFormat == dm_sw_64kb_d_t
1566 || TilingFormat == dm_sw_64kb_r_x))
1567 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1568 segment_order_vert_contiguous_luma = 0;
1569 } else {
1570 segment_order_vert_contiguous_luma = 1;
1571 }
1572 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1573 segment_order_horz_contiguous_chroma = 0;
1574 } else {
1575 segment_order_horz_contiguous_chroma = 1;
1576 }
1577 if ((BytePerPixelC == 8
1578 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1579 || TilingFormat == dm_sw_64kb_d_t
1580 || TilingFormat == dm_sw_64kb_r_x))
1581 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1582 segment_order_vert_contiguous_chroma = 0;
1583 } else {
1584 segment_order_vert_contiguous_chroma = 1;
1585 }
1586
1587 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1588 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1589 RequestLuma = REQ_256Bytes;
1590 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1591 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1592 RequestLuma = REQ_128BytesNonContiguous;
1593 } else {
1594 RequestLuma = REQ_128BytesContiguous;
1595 }
1596 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1597 RequestChroma = REQ_256Bytes;
1598 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1599 || (req128_vert_wc_c == 1
1600 && segment_order_vert_contiguous_chroma == 0)) {
1601 RequestChroma = REQ_128BytesNonContiguous;
1602 } else {
1603 RequestChroma = REQ_128BytesContiguous;
1604 }
1605 } else if (ScanOrientation != dm_vert) {
1606 if (req128_horz_wc_l == 0) {
1607 RequestLuma = REQ_256Bytes;
1608 } else if (segment_order_horz_contiguous_luma == 0) {
1609 RequestLuma = REQ_128BytesNonContiguous;
1610 } else {
1611 RequestLuma = REQ_128BytesContiguous;
1612 }
1613 if (req128_horz_wc_c == 0) {
1614 RequestChroma = REQ_256Bytes;
1615 } else if (segment_order_horz_contiguous_chroma == 0) {
1616 RequestChroma = REQ_128BytesNonContiguous;
1617 } else {
1618 RequestChroma = REQ_128BytesContiguous;
1619 }
1620 } else {
1621 if (req128_vert_wc_l == 0) {
1622 RequestLuma = REQ_256Bytes;
1623 } else if (segment_order_vert_contiguous_luma == 0) {
1624 RequestLuma = REQ_128BytesNonContiguous;
1625 } else {
1626 RequestLuma = REQ_128BytesContiguous;
1627 }
1628 if (req128_vert_wc_c == 0) {
1629 RequestChroma = REQ_256Bytes;
1630 } else if (segment_order_vert_contiguous_chroma == 0) {
1631 RequestChroma = REQ_128BytesNonContiguous;
1632 } else {
1633 RequestChroma = REQ_128BytesContiguous;
1634 }
1635 }
1636
1637 if (RequestLuma == REQ_256Bytes) {
1638 *MaxUncompressedBlockLuma = 256;
1639 *MaxCompressedBlockLuma = 256;
1640 *IndependentBlockLuma = 0;
1641 } else if (RequestLuma == REQ_128BytesContiguous) {
1642 *MaxUncompressedBlockLuma = 256;
1643 *MaxCompressedBlockLuma = 128;
1644 *IndependentBlockLuma = 128;
1645 } else {
1646 *MaxUncompressedBlockLuma = 256;
1647 *MaxCompressedBlockLuma = 64;
1648 *IndependentBlockLuma = 64;
1649 }
1650
1651 if (RequestChroma == REQ_256Bytes) {
1652 *MaxUncompressedBlockChroma = 256;
1653 *MaxCompressedBlockChroma = 256;
1654 *IndependentBlockChroma = 0;
1655 } else if (RequestChroma == REQ_128BytesContiguous) {
1656 *MaxUncompressedBlockChroma = 256;
1657 *MaxCompressedBlockChroma = 128;
1658 *IndependentBlockChroma = 128;
1659 } else {
1660 *MaxUncompressedBlockChroma = 256;
1661 *MaxCompressedBlockChroma = 64;
1662 *IndependentBlockChroma = 64;
1663 }
1664
1665 if (DCCEnabled != true || BytePerPixelC == 0) {
1666 *MaxUncompressedBlockChroma = 0;
1667 *MaxCompressedBlockChroma = 0;
1668 *IndependentBlockChroma = 0;
1669 }
1670
1671 if (DCCEnabled != true) {
1672 *MaxUncompressedBlockLuma = 0;
1673 *MaxCompressedBlockLuma = 0;
1674 *IndependentBlockLuma = 0;
1675 }
1676 }
1677
1678
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1679 static double CalculatePrefetchSourceLines(
1680 struct display_mode_lib *mode_lib,
1681 double VRatio,
1682 double vtaps,
1683 bool Interlace,
1684 bool ProgressiveToInterlaceUnitInOPP,
1685 unsigned int SwathHeight,
1686 unsigned int ViewportYStart,
1687 double *VInitPreFill,
1688 unsigned int *MaxNumSwath)
1689 {
1690 unsigned int MaxPartialSwath = 0;
1691
1692 if (ProgressiveToInterlaceUnitInOPP)
1693 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1694 else
1695 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1696
1697 if (!mode_lib->vba.IgnoreViewportPositioning) {
1698
1699 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1700
1701 if (*VInitPreFill > 1.0)
1702 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1703 else
1704 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1705 % SwathHeight;
1706 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1707
1708 } else {
1709
1710 if (ViewportYStart != 0)
1711 dml_print(
1712 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1713
1714 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1715
1716 if (*VInitPreFill > 1.0)
1717 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1718 else
1719 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1720 % SwathHeight;
1721 }
1722
1723 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1724 }
1725
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int SwathWidth,unsigned int ViewportHeight,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMinPageSize,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1726 static unsigned int CalculateVMAndRowBytes(
1727 struct display_mode_lib *mode_lib,
1728 bool DCCEnable,
1729 unsigned int BlockHeight256Bytes,
1730 unsigned int BlockWidth256Bytes,
1731 enum source_format_class SourcePixelFormat,
1732 unsigned int SurfaceTiling,
1733 unsigned int BytePerPixel,
1734 enum scan_direction_class ScanDirection,
1735 unsigned int SwathWidth,
1736 unsigned int ViewportHeight,
1737 bool GPUVMEnable,
1738 bool HostVMEnable,
1739 unsigned int HostVMMaxNonCachedPageTableLevels,
1740 unsigned int GPUVMMinPageSize,
1741 unsigned int HostVMMinPageSize,
1742 unsigned int PTEBufferSizeInRequests,
1743 unsigned int Pitch,
1744 unsigned int DCCMetaPitch,
1745 unsigned int *MacroTileWidth,
1746 unsigned int *MetaRowByte,
1747 unsigned int *PixelPTEBytesPerRow,
1748 bool *PTEBufferSizeNotExceeded,
1749 unsigned int *dpte_row_width_ub,
1750 unsigned int *dpte_row_height,
1751 unsigned int *MetaRequestWidth,
1752 unsigned int *MetaRequestHeight,
1753 unsigned int *meta_row_width,
1754 unsigned int *meta_row_height,
1755 unsigned int *vm_group_bytes,
1756 unsigned int *dpte_group_bytes,
1757 unsigned int *PixelPTEReqWidth,
1758 unsigned int *PixelPTEReqHeight,
1759 unsigned int *PTERequestSize,
1760 unsigned int *DPDE0BytesFrame,
1761 unsigned int *MetaPTEBytesFrame)
1762 {
1763 unsigned int MPDEBytesFrame = 0;
1764 unsigned int DCCMetaSurfaceBytes = 0;
1765 unsigned int MacroTileSizeBytes = 0;
1766 unsigned int MacroTileHeight = 0;
1767 unsigned int ExtraDPDEBytesFrame = 0;
1768 unsigned int PDEAndMetaPTEBytesFrame = 0;
1769 unsigned int PixelPTEReqHeightPTEs = 0;
1770 unsigned int HostVMDynamicLevels = 0;
1771
1772 double FractionOfPTEReturnDrop;
1773
1774 if (GPUVMEnable == true && HostVMEnable == true) {
1775 if (HostVMMinPageSize < 2048) {
1776 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1777 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1778 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1779 } else {
1780 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1781 }
1782 }
1783
1784 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1785 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1786 if (ScanDirection != dm_vert) {
1787 *meta_row_height = *MetaRequestHeight;
1788 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1789 + *MetaRequestWidth;
1790 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1791 } else {
1792 *meta_row_height = *MetaRequestWidth;
1793 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1794 + *MetaRequestHeight;
1795 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1796 }
1797 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1798 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1799 if (GPUVMEnable == true) {
1800 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1801 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1802 } else {
1803 *MetaPTEBytesFrame = 0;
1804 MPDEBytesFrame = 0;
1805 }
1806
1807 if (DCCEnable != true) {
1808 *MetaPTEBytesFrame = 0;
1809 MPDEBytesFrame = 0;
1810 *MetaRowByte = 0;
1811 }
1812
1813 if (SurfaceTiling == dm_sw_linear) {
1814 MacroTileSizeBytes = 256;
1815 MacroTileHeight = BlockHeight256Bytes;
1816 } else {
1817 MacroTileSizeBytes = 65536;
1818 MacroTileHeight = 16 * BlockHeight256Bytes;
1819 }
1820 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1821
1822 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1823 if (ScanDirection != dm_vert) {
1824 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1825 } else {
1826 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1827 }
1828 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1829 } else {
1830 *DPDE0BytesFrame = 0;
1831 ExtraDPDEBytesFrame = 0;
1832 }
1833
1834 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1835 + ExtraDPDEBytesFrame;
1836
1837 if (HostVMEnable == true) {
1838 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1839 }
1840
1841 if (SurfaceTiling == dm_sw_linear) {
1842 PixelPTEReqHeightPTEs = 1;
1843 *PixelPTEReqHeight = 1;
1844 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1845 *PTERequestSize = 64;
1846 FractionOfPTEReturnDrop = 0;
1847 } else if (MacroTileSizeBytes == 4096) {
1848 PixelPTEReqHeightPTEs = 1;
1849 *PixelPTEReqHeight = MacroTileHeight;
1850 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1851 *PTERequestSize = 64;
1852 if (ScanDirection != dm_vert)
1853 FractionOfPTEReturnDrop = 0;
1854 else
1855 FractionOfPTEReturnDrop = 7 / 8;
1856 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1857 PixelPTEReqHeightPTEs = 16;
1858 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1859 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1860 *PTERequestSize = 128;
1861 FractionOfPTEReturnDrop = 0;
1862 } else {
1863 PixelPTEReqHeightPTEs = 1;
1864 *PixelPTEReqHeight = MacroTileHeight;
1865 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1866 *PTERequestSize = 64;
1867 FractionOfPTEReturnDrop = 0;
1868 }
1869
1870 if (SurfaceTiling == dm_sw_linear) {
1871 if (PTEBufferSizeInRequests == 0)
1872 *dpte_row_height = 1;
1873 else
1874 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1875 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1876 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1877 } else if (ScanDirection != dm_vert) {
1878 *dpte_row_height = *PixelPTEReqHeight;
1879 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1880 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1881 } else {
1882 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1883 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1884 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1885 }
1886 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1887 <= 64 * PTEBufferSizeInRequests) {
1888 *PTEBufferSizeNotExceeded = true;
1889 } else {
1890 *PTEBufferSizeNotExceeded = false;
1891 }
1892
1893 if (GPUVMEnable != true) {
1894 *PixelPTEBytesPerRow = 0;
1895 *PTEBufferSizeNotExceeded = true;
1896 }
1897 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1898
1899 if (HostVMEnable == true) {
1900 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1901 }
1902
1903 if (HostVMEnable == true) {
1904 *vm_group_bytes = 512;
1905 *dpte_group_bytes = 512;
1906 } else if (GPUVMEnable == true) {
1907 *vm_group_bytes = 2048;
1908 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1909 *dpte_group_bytes = 512;
1910 } else {
1911 *dpte_group_bytes = 2048;
1912 }
1913 } else {
1914 *vm_group_bytes = 0;
1915 *dpte_group_bytes = 0;
1916 }
1917
1918 return PDEAndMetaPTEBytesFrame;
1919 }
1920
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1921 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1922 struct display_mode_lib *mode_lib)
1923 {
1924 struct vba_vars_st *v = &mode_lib->vba;
1925 unsigned int j, k;
1926 long ReorderBytes = 0;
1927 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1928 double MaxTotalRDBandwidth = 0;
1929 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1930 bool DestinationLineTimesForPrefetchLessThan2 = false;
1931 bool VRatioPrefetchMoreThan4 = false;
1932 double TWait;
1933
1934 v->WritebackDISPCLK = 0.0;
1935 v->DISPCLKWithRamping = 0;
1936 v->DISPCLKWithoutRamping = 0;
1937 v->GlobalDPPCLK = 0.0;
1938 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1939 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1940 v->ReturnBusWidth * v->DCFCLK,
1941 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1942 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1943 if (v->HostVMEnable != true) {
1944 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1945 } else {
1946 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1947 }
1948 /* End DAL custom code */
1949
1950 // DISPCLK and DPPCLK Calculation
1951 //
1952 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1953 if (v->WritebackEnable[k]) {
1954 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1955 dml30_CalculateWriteBackDISPCLK(
1956 v->WritebackPixelFormat[k],
1957 v->PixelClock[k],
1958 v->WritebackHRatio[k],
1959 v->WritebackVRatio[k],
1960 v->WritebackHTaps[k],
1961 v->WritebackVTaps[k],
1962 v->WritebackSourceWidth[k],
1963 v->WritebackDestinationWidth[k],
1964 v->HTotal[k],
1965 v->WritebackLineBufferSize));
1966 }
1967 }
1968
1969 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1970 if (v->HRatio[k] > 1) {
1971 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1972 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1973 } else {
1974 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1975 v->MaxDCHUBToPSCLThroughput,
1976 v->MaxPSCLToLBThroughput);
1977 }
1978
1979 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1980 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1981 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1982
1983 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1984 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1985 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1986 }
1987
1988 if ((v->SourcePixelFormat[k] != dm_420_8
1989 && v->SourcePixelFormat[k] != dm_420_10
1990 && v->SourcePixelFormat[k] != dm_420_12
1991 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1992 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1993 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1994 } else {
1995 if (v->HRatioChroma[k] > 1) {
1996 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1997 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1998 } else {
1999 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2000 v->MaxDCHUBToPSCLThroughput,
2001 v->MaxPSCLToLBThroughput);
2002 }
2003 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2004 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2005 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
2006
2007 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
2008 && v->DPPCLKUsingSingleDPPChroma
2009 < 2 * v->PixelClock[k]) {
2010 v->DPPCLKUsingSingleDPPChroma = 2
2011 * v->PixelClock[k];
2012 }
2013
2014 v->DPPCLKUsingSingleDPP[k] = dml_max(
2015 v->DPPCLKUsingSingleDPPLuma,
2016 v->DPPCLKUsingSingleDPPChroma);
2017 }
2018 }
2019
2020 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2021 if (v->BlendingAndTiming[k] != k)
2022 continue;
2023 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2024 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2025 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2026 * (1 + v->DISPCLKRampingMargin / 100));
2027 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2028 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2029 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2030 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2031 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2032 * (1 + v->DISPCLKRampingMargin / 100));
2033 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2034 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2035 } else {
2036 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2037 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2038 * (1 + v->DISPCLKRampingMargin / 100));
2039 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2040 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2041 }
2042 }
2043
2044 v->DISPCLKWithRamping = dml_max(
2045 v->DISPCLKWithRamping,
2046 v->WritebackDISPCLK);
2047 v->DISPCLKWithoutRamping = dml_max(
2048 v->DISPCLKWithoutRamping,
2049 v->WritebackDISPCLK);
2050
2051 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2052 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2053 v->DISPCLKWithRamping,
2054 v->DISPCLKDPPCLKVCOSpeed);
2055 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2056 v->DISPCLKWithoutRamping,
2057 v->DISPCLKDPPCLKVCOSpeed);
2058 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2059 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
2060 v->DISPCLKDPPCLKVCOSpeed);
2061 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
2062 > v->MaxDispclkRoundedToDFSGranularity) {
2063 v->DISPCLK_calculated =
2064 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2065 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
2066 > v->MaxDispclkRoundedToDFSGranularity) {
2067 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2068 } else {
2069 v->DISPCLK_calculated =
2070 v->DISPCLKWithRampingRoundedToDFSGranularity;
2071 }
2072 v->DISPCLK = v->DISPCLK_calculated;
2073 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2074
2075 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2076 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2077 / v->DPPPerPlane[k]
2078 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2079 v->GlobalDPPCLK = dml_max(
2080 v->GlobalDPPCLK,
2081 v->DPPCLK_calculated[k]);
2082 }
2083 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2084 v->GlobalDPPCLK,
2085 v->DISPCLKDPPCLKVCOSpeed);
2086 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2087 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2088 * dml_ceil(
2089 v->DPPCLK_calculated[k] * 255.0
2090 / v->GlobalDPPCLK,
2091 1);
2092 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2093 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2094 }
2095
2096 // Urgent and B P-State/DRAM Clock Change Watermark
2097 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2098 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2099
2100 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2101 CalculateBytePerPixelAnd256BBlockSizes(
2102 v->SourcePixelFormat[k],
2103 v->SurfaceTiling[k],
2104 &v->BytePerPixelY[k],
2105 &v->BytePerPixelC[k],
2106 &v->BytePerPixelDETY[k],
2107 &v->BytePerPixelDETC[k],
2108 &v->BlockHeight256BytesY[k],
2109 &v->BlockHeight256BytesC[k],
2110 &v->BlockWidth256BytesY[k],
2111 &v->BlockWidth256BytesC[k]);
2112 }
2113
2114 CalculateSwathWidth(
2115 false,
2116 v->NumberOfActivePlanes,
2117 v->SourcePixelFormat,
2118 v->SourceScan,
2119 v->ViewportWidth,
2120 v->ViewportHeight,
2121 v->SurfaceWidthY,
2122 v->SurfaceWidthC,
2123 v->SurfaceHeightY,
2124 v->SurfaceHeightC,
2125 v->ODMCombineEnabled,
2126 v->BytePerPixelY,
2127 v->BytePerPixelC,
2128 v->BlockHeight256BytesY,
2129 v->BlockHeight256BytesC,
2130 v->BlockWidth256BytesY,
2131 v->BlockWidth256BytesC,
2132 v->BlendingAndTiming,
2133 v->HActive,
2134 v->HRatio,
2135 v->DPPPerPlane,
2136 v->SwathWidthSingleDPPY,
2137 v->SwathWidthSingleDPPC,
2138 v->SwathWidthY,
2139 v->SwathWidthC,
2140 v->dummyinteger3,
2141 v->dummyinteger4,
2142 v->swath_width_luma_ub,
2143 v->swath_width_chroma_ub);
2144
2145
2146 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2147 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2148 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2149 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2150 }
2151
2152
2153 // DCFCLK Deep Sleep
2154 CalculateDCFCLKDeepSleep(
2155 mode_lib,
2156 v->NumberOfActivePlanes,
2157 v->BytePerPixelY,
2158 v->BytePerPixelC,
2159 v->VRatio,
2160 v->VRatioChroma,
2161 v->SwathWidthY,
2162 v->SwathWidthC,
2163 v->DPPPerPlane,
2164 v->HRatio,
2165 v->HRatioChroma,
2166 v->PixelClock,
2167 v->PSCL_THROUGHPUT_LUMA,
2168 v->PSCL_THROUGHPUT_CHROMA,
2169 v->DPPCLK,
2170 v->ReadBandwidthPlaneLuma,
2171 v->ReadBandwidthPlaneChroma,
2172 v->ReturnBusWidth,
2173 &v->DCFCLKDeepSleep);
2174
2175 // DSCCLK
2176 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2177 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2178 v->DSCCLK_calculated[k] = 0.0;
2179 } else {
2180 if (v->OutputFormat[k] == dm_420)
2181 v->DSCFormatFactor = 2;
2182 else if (v->OutputFormat[k] == dm_444)
2183 v->DSCFormatFactor = 1;
2184 else if (v->OutputFormat[k] == dm_n422)
2185 v->DSCFormatFactor = 2;
2186 else
2187 v->DSCFormatFactor = 1;
2188 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2189 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2190 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2191 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2192 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2193 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2194 else
2195 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2196 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2197 }
2198 }
2199
2200 // DSC Delay
2201 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2202 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2203
2204 if (v->DSCEnabled[k] && BPP != 0) {
2205 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2206 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2207 BPP,
2208 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2209 v->NumberOfDSCSlices[k],
2210 v->OutputFormat[k],
2211 v->Output[k])
2212 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2213 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2214 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2215 BPP,
2216 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2217 v->NumberOfDSCSlices[k] / 2.0,
2218 v->OutputFormat[k],
2219 v->Output[k])
2220 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2221 } else {
2222 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2223 BPP,
2224 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2225 v->NumberOfDSCSlices[k] / 4.0,
2226 v->OutputFormat[k],
2227 v->Output[k])
2228 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2229 }
2230 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2231 } else {
2232 v->DSCDelay[k] = 0;
2233 }
2234 }
2235
2236 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2237 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2238 if (j != k && v->BlendingAndTiming[k] == j
2239 && v->DSCEnabled[j])
2240 v->DSCDelay[k] = v->DSCDelay[j];
2241
2242 // Prefetch
2243 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2244 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2245 unsigned int PixelPTEBytesPerRowY = 0;
2246 unsigned int MetaRowByteY = 0;
2247 unsigned int MetaRowByteC = 0;
2248 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2249 unsigned int PixelPTEBytesPerRowC = 0;
2250 bool PTEBufferSizeNotExceededY = 0;
2251 bool PTEBufferSizeNotExceededC = 0;
2252
2253
2254 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2255 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2256 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2257 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2258 } else {
2259 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2260 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2261
2262 }
2263 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2264 mode_lib,
2265 v->DCCEnable[k],
2266 v->BlockHeight256BytesC[k],
2267 v->BlockWidth256BytesC[k],
2268 v->SourcePixelFormat[k],
2269 v->SurfaceTiling[k],
2270 v->BytePerPixelC[k],
2271 v->SourceScan[k],
2272 v->SwathWidthC[k],
2273 v->ViewportHeightChroma[k],
2274 v->GPUVMEnable,
2275 v->HostVMEnable,
2276 v->HostVMMaxNonCachedPageTableLevels,
2277 v->GPUVMMinPageSize,
2278 v->HostVMMinPageSize,
2279 v->PTEBufferSizeInRequestsForChroma,
2280 v->PitchC[k],
2281 v->DCCMetaPitchC[k],
2282 &v->MacroTileWidthC[k],
2283 &MetaRowByteC,
2284 &PixelPTEBytesPerRowC,
2285 &PTEBufferSizeNotExceededC,
2286 &v->dpte_row_width_chroma_ub[k],
2287 &v->dpte_row_height_chroma[k],
2288 &v->meta_req_width_chroma[k],
2289 &v->meta_req_height_chroma[k],
2290 &v->meta_row_width_chroma[k],
2291 &v->meta_row_height_chroma[k],
2292 &v->dummyinteger1,
2293 &v->dummyinteger2,
2294 &v->PixelPTEReqWidthC[k],
2295 &v->PixelPTEReqHeightC[k],
2296 &v->PTERequestSizeC[k],
2297 &v->dpde0_bytes_per_frame_ub_c[k],
2298 &v->meta_pte_bytes_per_frame_ub_c[k]);
2299
2300 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2301 mode_lib,
2302 v->VRatioChroma[k],
2303 v->VTAPsChroma[k],
2304 v->Interlace[k],
2305 v->ProgressiveToInterlaceUnitInOPP,
2306 v->SwathHeightC[k],
2307 v->ViewportYStartC[k],
2308 &v->VInitPreFillC[k],
2309 &v->MaxNumSwathC[k]);
2310 } else {
2311 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2312 v->PTEBufferSizeInRequestsForChroma = 0;
2313 PixelPTEBytesPerRowC = 0;
2314 PDEAndMetaPTEBytesFrameC = 0;
2315 MetaRowByteC = 0;
2316 v->MaxNumSwathC[k] = 0;
2317 v->PrefetchSourceLinesC[k] = 0;
2318 }
2319
2320 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2321 mode_lib,
2322 v->DCCEnable[k],
2323 v->BlockHeight256BytesY[k],
2324 v->BlockWidth256BytesY[k],
2325 v->SourcePixelFormat[k],
2326 v->SurfaceTiling[k],
2327 v->BytePerPixelY[k],
2328 v->SourceScan[k],
2329 v->SwathWidthY[k],
2330 v->ViewportHeight[k],
2331 v->GPUVMEnable,
2332 v->HostVMEnable,
2333 v->HostVMMaxNonCachedPageTableLevels,
2334 v->GPUVMMinPageSize,
2335 v->HostVMMinPageSize,
2336 v->PTEBufferSizeInRequestsForLuma,
2337 v->PitchY[k],
2338 v->DCCMetaPitchY[k],
2339 &v->MacroTileWidthY[k],
2340 &MetaRowByteY,
2341 &PixelPTEBytesPerRowY,
2342 &PTEBufferSizeNotExceededY,
2343 &v->dpte_row_width_luma_ub[k],
2344 &v->dpte_row_height[k],
2345 &v->meta_req_width[k],
2346 &v->meta_req_height[k],
2347 &v->meta_row_width[k],
2348 &v->meta_row_height[k],
2349 &v->vm_group_bytes[k],
2350 &v->dpte_group_bytes[k],
2351 &v->PixelPTEReqWidthY[k],
2352 &v->PixelPTEReqHeightY[k],
2353 &v->PTERequestSizeY[k],
2354 &v->dpde0_bytes_per_frame_ub_l[k],
2355 &v->meta_pte_bytes_per_frame_ub_l[k]);
2356
2357 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2358 mode_lib,
2359 v->VRatio[k],
2360 v->vtaps[k],
2361 v->Interlace[k],
2362 v->ProgressiveToInterlaceUnitInOPP,
2363 v->SwathHeightY[k],
2364 v->ViewportYStartY[k],
2365 &v->VInitPreFillY[k],
2366 &v->MaxNumSwathY[k]);
2367 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2368 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2369 + PDEAndMetaPTEBytesFrameC;
2370 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2371
2372 CalculateRowBandwidth(
2373 v->GPUVMEnable,
2374 v->SourcePixelFormat[k],
2375 v->VRatio[k],
2376 v->VRatioChroma[k],
2377 v->DCCEnable[k],
2378 v->HTotal[k] / v->PixelClock[k],
2379 MetaRowByteY,
2380 MetaRowByteC,
2381 v->meta_row_height[k],
2382 v->meta_row_height_chroma[k],
2383 PixelPTEBytesPerRowY,
2384 PixelPTEBytesPerRowC,
2385 v->dpte_row_height[k],
2386 v->dpte_row_height_chroma[k],
2387 &v->meta_row_bw[k],
2388 &v->dpte_row_bw[k]);
2389 }
2390
2391 v->TotalDCCActiveDPP = 0;
2392 v->TotalActiveDPP = 0;
2393 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2394 v->TotalActiveDPP = v->TotalActiveDPP
2395 + v->DPPPerPlane[k];
2396 if (v->DCCEnable[k])
2397 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2398 + v->DPPPerPlane[k];
2399 }
2400
2401
2402 ReorderBytes = v->NumberOfChannels * dml_max3(
2403 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2404 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2405 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2406
2407 v->UrgentExtraLatency = CalculateExtraLatency(
2408 v->RoundTripPingLatencyCycles,
2409 ReorderBytes,
2410 v->DCFCLK,
2411 v->TotalActiveDPP,
2412 v->PixelChunkSizeInKByte,
2413 v->TotalDCCActiveDPP,
2414 v->MetaChunkSize,
2415 v->ReturnBW,
2416 v->GPUVMEnable,
2417 v->HostVMEnable,
2418 v->NumberOfActivePlanes,
2419 v->DPPPerPlane,
2420 v->dpte_group_bytes,
2421 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2422 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2423 v->HostVMMinPageSize,
2424 v->HostVMMaxNonCachedPageTableLevels);
2425
2426 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2427
2428 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2429 if (v->BlendingAndTiming[k] == k) {
2430 if (v->WritebackEnable[k] == true) {
2431 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2432 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2433 v->WritebackHRatio[k],
2434 v->WritebackVRatio[k],
2435 v->WritebackVTaps[k],
2436 v->WritebackDestinationWidth[k],
2437 v->WritebackDestinationHeight[k],
2438 v->WritebackSourceHeight[k],
2439 v->HTotal[k]) / v->DISPCLK;
2440 } else
2441 v->WritebackDelay[v->VoltageLevel][k] = 0;
2442 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2443 if (v->BlendingAndTiming[j] == k
2444 && v->WritebackEnable[j] == true) {
2445 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2446 v->WritebackLatency + CalculateWriteBackDelay(
2447 v->WritebackPixelFormat[j],
2448 v->WritebackHRatio[j],
2449 v->WritebackVRatio[j],
2450 v->WritebackVTaps[j],
2451 v->WritebackDestinationWidth[j],
2452 v->WritebackDestinationHeight[j],
2453 v->WritebackSourceHeight[j],
2454 v->HTotal[k]) / v->DISPCLK);
2455 }
2456 }
2457 }
2458 }
2459
2460 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2461 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2462 if (v->BlendingAndTiming[k] == j)
2463 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2464
2465 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2466 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2467 }
2468
2469 v->MaximumMaxVStartupLines = 0;
2470 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2471 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2472
2473 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2474 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2475 } else {
2476 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2477 }
2478 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2479
2480
2481 v->FractionOfUrgentBandwidth = 0.0;
2482 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2483
2484 v->VStartupLines = 13;
2485
2486 do {
2487 MaxTotalRDBandwidth = 0;
2488 MaxTotalRDBandwidthNoUrgentBurst = 0;
2489 DestinationLineTimesForPrefetchLessThan2 = false;
2490 VRatioPrefetchMoreThan4 = false;
2491 TWait = CalculateTWait(
2492 PrefetchMode,
2493 v->FinalDRAMClockChangeLatency,
2494 v->UrgentLatency,
2495 v->SREnterPlusExitTime);
2496
2497 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2498 Pipe myPipe = { 0 };
2499
2500 myPipe.DPPCLK = v->DPPCLK[k];
2501 myPipe.DISPCLK = v->DISPCLK;
2502 myPipe.PixelClock = v->PixelClock[k];
2503 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2504 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2505 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2506 myPipe.SourceScan = v->SourceScan[k];
2507 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2508 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2509 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2510 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2511 myPipe.InterlaceEnable = v->Interlace[k];
2512 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2513 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2514 myPipe.HTotal = v->HTotal[k];
2515 myPipe.DCCEnable = v->DCCEnable[k];
2516 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2517
2518 v->ErrorResult[k] = CalculatePrefetchSchedule(
2519 mode_lib,
2520 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2521 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2522 &myPipe,
2523 v->DSCDelay[k],
2524 v->DPPCLKDelaySubtotal
2525 + v->DPPCLKDelayCNVCFormater,
2526 v->DPPCLKDelaySCL,
2527 v->DPPCLKDelaySCLLBOnly,
2528 v->DPPCLKDelayCNVCCursor,
2529 v->DISPCLKDelaySubtotal,
2530 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2531 v->OutputFormat[k],
2532 v->MaxInterDCNTileRepeaters,
2533 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2534 v->MaxVStartupLines[k],
2535 v->GPUVMMaxPageTableLevels,
2536 v->GPUVMEnable,
2537 v->HostVMEnable,
2538 v->HostVMMaxNonCachedPageTableLevels,
2539 v->HostVMMinPageSize,
2540 v->DynamicMetadataEnable[k],
2541 v->DynamicMetadataVMEnabled,
2542 v->DynamicMetadataLinesBeforeActiveRequired[k],
2543 v->DynamicMetadataTransmittedBytes[k],
2544 v->UrgentLatency,
2545 v->UrgentExtraLatency,
2546 v->TCalc,
2547 v->PDEAndMetaPTEBytesFrame[k],
2548 v->MetaRowByte[k],
2549 v->PixelPTEBytesPerRow[k],
2550 v->PrefetchSourceLinesY[k],
2551 v->SwathWidthY[k],
2552 v->BytePerPixelY[k],
2553 v->VInitPreFillY[k],
2554 v->MaxNumSwathY[k],
2555 v->PrefetchSourceLinesC[k],
2556 v->SwathWidthC[k],
2557 v->BytePerPixelC[k],
2558 v->VInitPreFillC[k],
2559 v->MaxNumSwathC[k],
2560 v->swath_width_luma_ub[k],
2561 v->swath_width_chroma_ub[k],
2562 v->SwathHeightY[k],
2563 v->SwathHeightC[k],
2564 TWait,
2565 v->ProgressiveToInterlaceUnitInOPP,
2566 &v->DSTXAfterScaler[k],
2567 &v->DSTYAfterScaler[k],
2568 &v->DestinationLinesForPrefetch[k],
2569 &v->PrefetchBandwidth[k],
2570 &v->DestinationLinesToRequestVMInVBlank[k],
2571 &v->DestinationLinesToRequestRowInVBlank[k],
2572 &v->VRatioPrefetchY[k],
2573 &v->VRatioPrefetchC[k],
2574 &v->RequiredPrefetchPixDataBWLuma[k],
2575 &v->RequiredPrefetchPixDataBWChroma[k],
2576 &v->NotEnoughTimeForDynamicMetadata[k],
2577 &v->Tno_bw[k],
2578 &v->prefetch_vmrow_bw[k],
2579 &v->Tdmdl_vm[k],
2580 &v->Tdmdl[k],
2581 &v->VUpdateOffsetPix[k],
2582 &v->VUpdateWidthPix[k],
2583 &v->VReadyOffsetPix[k]);
2584 if (v->BlendingAndTiming[k] == k) {
2585 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2586 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2587 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2588 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2589 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2590 } else {
2591 int x = v->BlendingAndTiming[k];
2592 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2593 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2594 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2595 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2596 if (!v->MaxVStartupLines[x])
2597 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2598 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2599 }
2600 }
2601
2602 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2603 v->NotEnoughUrgentLatencyHidingPre = false;
2604
2605 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2606 v->cursor_bw[k] = v->NumberOfCursors[k]
2607 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2608 / 8.0
2609 / (v->HTotal[k] / v->PixelClock[k])
2610 * v->VRatio[k];
2611 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2612 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2613 / 8.0
2614 / (v->HTotal[k] / v->PixelClock[k])
2615 * v->VRatioPrefetchY[k];
2616
2617 CalculateUrgentBurstFactor(
2618 v->swath_width_luma_ub[k],
2619 v->swath_width_chroma_ub[k],
2620 v->DETBufferSizeInKByte[0],
2621 v->SwathHeightY[k],
2622 v->SwathHeightC[k],
2623 v->HTotal[k] / v->PixelClock[k],
2624 v->UrgentLatency,
2625 v->CursorBufferSize,
2626 v->CursorWidth[k][0],
2627 v->CursorBPP[k][0],
2628 v->VRatio[k],
2629 v->VRatioChroma[k],
2630 v->BytePerPixelDETY[k],
2631 v->BytePerPixelDETC[k],
2632 v->DETBufferSizeY[k],
2633 v->DETBufferSizeC[k],
2634 &v->UrgentBurstFactorCursor[k],
2635 &v->UrgentBurstFactorLuma[k],
2636 &v->UrgentBurstFactorChroma[k],
2637 &v->NoUrgentLatencyHiding[k]);
2638
2639 CalculateUrgentBurstFactor(
2640 v->swath_width_luma_ub[k],
2641 v->swath_width_chroma_ub[k],
2642 v->DETBufferSizeInKByte[0],
2643 v->SwathHeightY[k],
2644 v->SwathHeightC[k],
2645 v->HTotal[k] / v->PixelClock[k],
2646 v->UrgentLatency,
2647 v->CursorBufferSize,
2648 v->CursorWidth[k][0],
2649 v->CursorBPP[k][0],
2650 v->VRatioPrefetchY[k],
2651 v->VRatioPrefetchC[k],
2652 v->BytePerPixelDETY[k],
2653 v->BytePerPixelDETC[k],
2654 v->DETBufferSizeY[k],
2655 v->DETBufferSizeC[k],
2656 &v->UrgentBurstFactorCursorPre[k],
2657 &v->UrgentBurstFactorLumaPre[k],
2658 &v->UrgentBurstFactorChromaPre[k],
2659 &v->NoUrgentLatencyHidingPre[k]);
2660
2661 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2662 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2663 v->ReadBandwidthPlaneLuma[k] *
2664 v->UrgentBurstFactorLuma[k] +
2665 v->ReadBandwidthPlaneChroma[k] *
2666 v->UrgentBurstFactorChroma[k] +
2667 v->cursor_bw[k] *
2668 v->UrgentBurstFactorCursor[k] +
2669 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2670 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2671 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2672 v->UrgentBurstFactorCursorPre[k]);
2673
2674 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2675 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2676 v->ReadBandwidthPlaneLuma[k] +
2677 v->ReadBandwidthPlaneChroma[k] +
2678 v->cursor_bw[k] +
2679 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2680 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2681
2682 if (v->DestinationLinesForPrefetch[k] < 2)
2683 DestinationLineTimesForPrefetchLessThan2 = true;
2684 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2685 VRatioPrefetchMoreThan4 = true;
2686 if (v->NoUrgentLatencyHiding[k] == true)
2687 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2688
2689 if (v->NoUrgentLatencyHidingPre[k] == true)
2690 v->NotEnoughUrgentLatencyHidingPre = true;
2691 }
2692 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2693
2694
2695 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2696 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2697 && !DestinationLineTimesForPrefetchLessThan2)
2698 v->PrefetchModeSupported = true;
2699 else {
2700 v->PrefetchModeSupported = false;
2701 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2702 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2703 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2704 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2705 }
2706
2707 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2708 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2709 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2710 v->BandwidthAvailableForImmediateFlip =
2711 v->BandwidthAvailableForImmediateFlip
2712 - dml_max(
2713 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2714 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2715 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2716 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2717 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2718 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2719 }
2720
2721 v->TotImmediateFlipBytes = 0;
2722 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2723 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2724 }
2725 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2726 CalculateFlipSchedule(
2727 mode_lib,
2728 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2729 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2730 v->UrgentExtraLatency,
2731 v->UrgentLatency,
2732 v->GPUVMMaxPageTableLevels,
2733 v->HostVMEnable,
2734 v->HostVMMaxNonCachedPageTableLevels,
2735 v->GPUVMEnable,
2736 v->HostVMMinPageSize,
2737 v->PDEAndMetaPTEBytesFrame[k],
2738 v->MetaRowByte[k],
2739 v->PixelPTEBytesPerRow[k],
2740 v->BandwidthAvailableForImmediateFlip,
2741 v->TotImmediateFlipBytes,
2742 v->SourcePixelFormat[k],
2743 v->HTotal[k] / v->PixelClock[k],
2744 v->VRatio[k],
2745 v->VRatioChroma[k],
2746 v->Tno_bw[k],
2747 v->DCCEnable[k],
2748 v->dpte_row_height[k],
2749 v->meta_row_height[k],
2750 v->dpte_row_height_chroma[k],
2751 v->meta_row_height_chroma[k],
2752 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2753 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2754 &v->final_flip_bw[k],
2755 &v->ImmediateFlipSupportedForPipe[k]);
2756 }
2757 v->total_dcn_read_bw_with_flip = 0.0;
2758 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2759 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2760 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2761 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2762 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2763 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2764 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2765 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2766 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2767 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2768 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2769 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2770 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2771 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2772 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2773 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2774 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2775
2776 }
2777 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2778
2779 v->ImmediateFlipSupported = true;
2780 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2781 v->ImmediateFlipSupported = false;
2782 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2783 }
2784 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2785 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2786 v->ImmediateFlipSupported = false;
2787 }
2788 }
2789 } else {
2790 v->ImmediateFlipSupported = false;
2791 }
2792
2793 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2794 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2795 v->PrefetchModeSupported = false;
2796 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2797 }
2798 }
2799
2800 v->VStartupLines = v->VStartupLines + 1;
2801 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2802 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2803 v->ImmediateFlipSupported)) ? true : false;
2804 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2805 ASSERT(v->PrefetchModeSupported);
2806
2807 //Watermarks and NB P-State/DRAM Clock Change Support
2808 {
2809 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2810 CalculateWatermarksAndDRAMSpeedChangeSupport(
2811 mode_lib,
2812 PrefetchMode,
2813 v->NumberOfActivePlanes,
2814 v->MaxLineBufferLines,
2815 v->LineBufferSize,
2816 v->DPPOutputBufferPixels,
2817 v->DETBufferSizeInKByte[0],
2818 v->WritebackInterfaceBufferSize,
2819 v->DCFCLK,
2820 v->ReturnBW,
2821 v->GPUVMEnable,
2822 v->dpte_group_bytes,
2823 v->MetaChunkSize,
2824 v->UrgentLatency,
2825 v->UrgentExtraLatency,
2826 v->WritebackLatency,
2827 v->WritebackChunkSize,
2828 v->SOCCLK,
2829 v->FinalDRAMClockChangeLatency,
2830 v->SRExitTime,
2831 v->SREnterPlusExitTime,
2832 v->DCFCLKDeepSleep,
2833 v->DPPPerPlane,
2834 v->DCCEnable,
2835 v->DPPCLK,
2836 v->DETBufferSizeY,
2837 v->DETBufferSizeC,
2838 v->SwathHeightY,
2839 v->SwathHeightC,
2840 v->LBBitPerPixel,
2841 v->SwathWidthY,
2842 v->SwathWidthC,
2843 v->HRatio,
2844 v->HRatioChroma,
2845 v->vtaps,
2846 v->VTAPsChroma,
2847 v->VRatio,
2848 v->VRatioChroma,
2849 v->HTotal,
2850 v->PixelClock,
2851 v->BlendingAndTiming,
2852 v->BytePerPixelDETY,
2853 v->BytePerPixelDETC,
2854 v->DSTXAfterScaler,
2855 v->DSTYAfterScaler,
2856 v->WritebackEnable,
2857 v->WritebackPixelFormat,
2858 v->WritebackDestinationWidth,
2859 v->WritebackDestinationHeight,
2860 v->WritebackSourceHeight,
2861 &DRAMClockChangeSupport,
2862 &v->UrgentWatermark,
2863 &v->WritebackUrgentWatermark,
2864 &v->DRAMClockChangeWatermark,
2865 &v->WritebackDRAMClockChangeWatermark,
2866 &v->StutterExitWatermark,
2867 &v->StutterEnterPlusExitWatermark,
2868 &v->MinActiveDRAMClockChangeLatencySupported);
2869
2870 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2871 if (v->WritebackEnable[k] == true) {
2872 if (v->BlendingAndTiming[k] == k) {
2873 v->ThisVStartup = v->VStartup[k];
2874 } else {
2875 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2876 if (v->BlendingAndTiming[k] == j) {
2877 v->ThisVStartup = v->VStartup[j];
2878 }
2879 }
2880 }
2881 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2882 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2883 } else {
2884 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2885 }
2886 }
2887
2888 }
2889
2890
2891 //Display Pipeline Delivery Time in Prefetch, Groups
2892 CalculatePixelDeliveryTimes(
2893 v->NumberOfActivePlanes,
2894 v->VRatio,
2895 v->VRatioChroma,
2896 v->VRatioPrefetchY,
2897 v->VRatioPrefetchC,
2898 v->swath_width_luma_ub,
2899 v->swath_width_chroma_ub,
2900 v->DPPPerPlane,
2901 v->HRatio,
2902 v->HRatioChroma,
2903 v->PixelClock,
2904 v->PSCL_THROUGHPUT_LUMA,
2905 v->PSCL_THROUGHPUT_CHROMA,
2906 v->DPPCLK,
2907 v->BytePerPixelC,
2908 v->SourceScan,
2909 v->NumberOfCursors,
2910 v->CursorWidth,
2911 v->CursorBPP,
2912 v->BlockWidth256BytesY,
2913 v->BlockHeight256BytesY,
2914 v->BlockWidth256BytesC,
2915 v->BlockHeight256BytesC,
2916 v->DisplayPipeLineDeliveryTimeLuma,
2917 v->DisplayPipeLineDeliveryTimeChroma,
2918 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2919 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2920 v->DisplayPipeRequestDeliveryTimeLuma,
2921 v->DisplayPipeRequestDeliveryTimeChroma,
2922 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2923 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2924 v->CursorRequestDeliveryTime,
2925 v->CursorRequestDeliveryTimePrefetch);
2926
2927 CalculateMetaAndPTETimes(
2928 v->NumberOfActivePlanes,
2929 v->GPUVMEnable,
2930 v->MetaChunkSize,
2931 v->MinMetaChunkSizeBytes,
2932 v->HTotal,
2933 v->VRatio,
2934 v->VRatioChroma,
2935 v->DestinationLinesToRequestRowInVBlank,
2936 v->DestinationLinesToRequestRowInImmediateFlip,
2937 v->DCCEnable,
2938 v->PixelClock,
2939 v->BytePerPixelY,
2940 v->BytePerPixelC,
2941 v->SourceScan,
2942 v->dpte_row_height,
2943 v->dpte_row_height_chroma,
2944 v->meta_row_width,
2945 v->meta_row_width_chroma,
2946 v->meta_row_height,
2947 v->meta_row_height_chroma,
2948 v->meta_req_width,
2949 v->meta_req_width_chroma,
2950 v->meta_req_height,
2951 v->meta_req_height_chroma,
2952 v->dpte_group_bytes,
2953 v->PTERequestSizeY,
2954 v->PTERequestSizeC,
2955 v->PixelPTEReqWidthY,
2956 v->PixelPTEReqHeightY,
2957 v->PixelPTEReqWidthC,
2958 v->PixelPTEReqHeightC,
2959 v->dpte_row_width_luma_ub,
2960 v->dpte_row_width_chroma_ub,
2961 v->DST_Y_PER_PTE_ROW_NOM_L,
2962 v->DST_Y_PER_PTE_ROW_NOM_C,
2963 v->DST_Y_PER_META_ROW_NOM_L,
2964 v->DST_Y_PER_META_ROW_NOM_C,
2965 v->TimePerMetaChunkNominal,
2966 v->TimePerChromaMetaChunkNominal,
2967 v->TimePerMetaChunkVBlank,
2968 v->TimePerChromaMetaChunkVBlank,
2969 v->TimePerMetaChunkFlip,
2970 v->TimePerChromaMetaChunkFlip,
2971 v->time_per_pte_group_nom_luma,
2972 v->time_per_pte_group_vblank_luma,
2973 v->time_per_pte_group_flip_luma,
2974 v->time_per_pte_group_nom_chroma,
2975 v->time_per_pte_group_vblank_chroma,
2976 v->time_per_pte_group_flip_chroma);
2977
2978 CalculateVMGroupAndRequestTimes(
2979 v->NumberOfActivePlanes,
2980 v->GPUVMEnable,
2981 v->GPUVMMaxPageTableLevels,
2982 v->HTotal,
2983 v->BytePerPixelC,
2984 v->DestinationLinesToRequestVMInVBlank,
2985 v->DestinationLinesToRequestVMInImmediateFlip,
2986 v->DCCEnable,
2987 v->PixelClock,
2988 v->dpte_row_width_luma_ub,
2989 v->dpte_row_width_chroma_ub,
2990 v->vm_group_bytes,
2991 v->dpde0_bytes_per_frame_ub_l,
2992 v->dpde0_bytes_per_frame_ub_c,
2993 v->meta_pte_bytes_per_frame_ub_l,
2994 v->meta_pte_bytes_per_frame_ub_c,
2995 v->TimePerVMGroupVBlank,
2996 v->TimePerVMGroupFlip,
2997 v->TimePerVMRequestVBlank,
2998 v->TimePerVMRequestFlip);
2999
3000
3001 // Min TTUVBlank
3002 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3003 if (PrefetchMode == 0) {
3004 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3005 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3006 v->MinTTUVBlank[k] = dml_max(
3007 v->DRAMClockChangeWatermark,
3008 dml_max(
3009 v->StutterEnterPlusExitWatermark,
3010 v->UrgentWatermark));
3011 } else if (PrefetchMode == 1) {
3012 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3013 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3014 v->MinTTUVBlank[k] = dml_max(
3015 v->StutterEnterPlusExitWatermark,
3016 v->UrgentWatermark);
3017 } else {
3018 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3019 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3020 v->MinTTUVBlank[k] = v->UrgentWatermark;
3021 }
3022 if (!v->DynamicMetadataEnable[k])
3023 v->MinTTUVBlank[k] = v->TCalc
3024 + v->MinTTUVBlank[k];
3025 }
3026
3027 // DCC Configuration
3028 v->ActiveDPPs = 0;
3029 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3030 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3031 v->SourcePixelFormat[k],
3032 v->SurfaceWidthY[k],
3033 v->SurfaceWidthC[k],
3034 v->SurfaceHeightY[k],
3035 v->SurfaceHeightC[k],
3036 v->DETBufferSizeInKByte[0] * 1024,
3037 v->BlockHeight256BytesY[k],
3038 v->BlockHeight256BytesC[k],
3039 v->SurfaceTiling[k],
3040 v->BytePerPixelY[k],
3041 v->BytePerPixelC[k],
3042 v->BytePerPixelDETY[k],
3043 v->BytePerPixelDETC[k],
3044 v->SourceScan[k],
3045 &v->DCCYMaxUncompressedBlock[k],
3046 &v->DCCCMaxUncompressedBlock[k],
3047 &v->DCCYMaxCompressedBlock[k],
3048 &v->DCCCMaxCompressedBlock[k],
3049 &v->DCCYIndependentBlock[k],
3050 &v->DCCCIndependentBlock[k]);
3051 }
3052
3053 {
3054 //Maximum Bandwidth Used
3055 double TotalWRBandwidth = 0;
3056 double MaxPerPlaneVActiveWRBandwidth = 0;
3057 double WRBandwidth = 0;
3058 double MaxUsedBW = 0;
3059 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3060 if (v->WritebackEnable[k] == true
3061 && v->WritebackPixelFormat[k] == dm_444_32) {
3062 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3063 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3064 } else if (v->WritebackEnable[k] == true) {
3065 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3066 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3067 }
3068 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3069 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3070 }
3071
3072 v->TotalDataReadBandwidth = 0;
3073 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3074 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
3075 + v->ReadBandwidthPlaneLuma[k]
3076 + v->ReadBandwidthPlaneChroma[k];
3077 }
3078
3079 {
3080 double MaxPerPlaneVActiveRDBandwidth = 0;
3081 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3082 MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth,
3083 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
3084
3085 }
3086 }
3087
3088 MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth;
3089 }
3090
3091 // VStartup Margin
3092 v->VStartupMargin = 0;
3093 v->FirstMainPlane = true;
3094 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3095 if (v->BlendingAndTiming[k] == k) {
3096 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3097 / v->PixelClock[k];
3098 if (v->FirstMainPlane == true) {
3099 v->VStartupMargin = margin;
3100 v->FirstMainPlane = false;
3101 } else {
3102 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3103 }
3104 }
3105 }
3106
3107 // Stutter Efficiency
3108 CalculateStutterEfficiency(
3109 v->NumberOfActivePlanes,
3110 v->ROBBufferSizeInKByte,
3111 v->TotalDataReadBandwidth,
3112 v->DCFCLK,
3113 v->ReturnBW,
3114 v->SRExitTime,
3115 v->SynchronizedVBlank,
3116 v->DPPPerPlane,
3117 v->DETBufferSizeY,
3118 v->BytePerPixelY,
3119 v->BytePerPixelDETY,
3120 v->SwathWidthY,
3121 v->SwathHeightY,
3122 v->SwathHeightC,
3123 v->DCCRateLuma,
3124 v->DCCRateChroma,
3125 v->HTotal,
3126 v->VTotal,
3127 v->PixelClock,
3128 v->VRatio,
3129 v->SourceScan,
3130 v->BlockHeight256BytesY,
3131 v->BlockWidth256BytesY,
3132 v->BlockHeight256BytesC,
3133 v->BlockWidth256BytesC,
3134 v->DCCYMaxUncompressedBlock,
3135 v->DCCCMaxUncompressedBlock,
3136 v->VActive,
3137 v->DCCEnable,
3138 v->WritebackEnable,
3139 v->ReadBandwidthPlaneLuma,
3140 v->ReadBandwidthPlaneChroma,
3141 v->meta_row_bw,
3142 v->dpte_row_bw,
3143 &v->StutterEfficiencyNotIncludingVBlank,
3144 &v->StutterEfficiency,
3145 &v->StutterPeriod);
3146 }
3147
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)3148 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3149 {
3150 // Display Pipe Configuration
3151 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3152 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3153 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3154 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3155 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3156 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3157 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3158 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3159 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3160 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3161 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3162 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3163 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3164 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3165 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3166 bool dummysinglestring = 0;
3167 unsigned int k;
3168
3169 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3170
3171 CalculateBytePerPixelAnd256BBlockSizes(
3172 mode_lib->vba.SourcePixelFormat[k],
3173 mode_lib->vba.SurfaceTiling[k],
3174 &BytePerPixY[k],
3175 &BytePerPixC[k],
3176 &BytePerPixDETY[k],
3177 &BytePerPixDETC[k],
3178 &Read256BytesBlockHeightY[k],
3179 &Read256BytesBlockHeightC[k],
3180 &Read256BytesBlockWidthY[k],
3181 &Read256BytesBlockWidthC[k]);
3182 }
3183 CalculateSwathAndDETConfiguration(
3184 false,
3185 mode_lib->vba.NumberOfActivePlanes,
3186 mode_lib->vba.DETBufferSizeInKByte[0],
3187 dummy1,
3188 dummy2,
3189 mode_lib->vba.SourceScan,
3190 mode_lib->vba.SourcePixelFormat,
3191 mode_lib->vba.SurfaceTiling,
3192 mode_lib->vba.ViewportWidth,
3193 mode_lib->vba.ViewportHeight,
3194 mode_lib->vba.SurfaceWidthY,
3195 mode_lib->vba.SurfaceWidthC,
3196 mode_lib->vba.SurfaceHeightY,
3197 mode_lib->vba.SurfaceHeightC,
3198 Read256BytesBlockHeightY,
3199 Read256BytesBlockHeightC,
3200 Read256BytesBlockWidthY,
3201 Read256BytesBlockWidthC,
3202 mode_lib->vba.ODMCombineEnabled,
3203 mode_lib->vba.BlendingAndTiming,
3204 BytePerPixY,
3205 BytePerPixC,
3206 BytePerPixDETY,
3207 BytePerPixDETC,
3208 mode_lib->vba.HActive,
3209 mode_lib->vba.HRatio,
3210 mode_lib->vba.HRatioChroma,
3211 mode_lib->vba.DPPPerPlane,
3212 dummy5,
3213 dummy6,
3214 dummy3,
3215 dummy4,
3216 mode_lib->vba.SwathHeightY,
3217 mode_lib->vba.SwathHeightC,
3218 mode_lib->vba.DETBufferSizeY,
3219 mode_lib->vba.DETBufferSizeC,
3220 dummy7,
3221 &dummysinglestring);
3222 }
3223
CalculateBytePerPixelAnd256BBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC)3224 static bool CalculateBytePerPixelAnd256BBlockSizes(
3225 enum source_format_class SourcePixelFormat,
3226 enum dm_swizzle_mode SurfaceTiling,
3227 unsigned int *BytePerPixelY,
3228 unsigned int *BytePerPixelC,
3229 double *BytePerPixelDETY,
3230 double *BytePerPixelDETC,
3231 unsigned int *BlockHeight256BytesY,
3232 unsigned int *BlockHeight256BytesC,
3233 unsigned int *BlockWidth256BytesY,
3234 unsigned int *BlockWidth256BytesC)
3235 {
3236 if (SourcePixelFormat == dm_444_64) {
3237 *BytePerPixelDETY = 8;
3238 *BytePerPixelDETC = 0;
3239 *BytePerPixelY = 8;
3240 *BytePerPixelC = 0;
3241 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3242 *BytePerPixelDETY = 4;
3243 *BytePerPixelDETC = 0;
3244 *BytePerPixelY = 4;
3245 *BytePerPixelC = 0;
3246 } else if (SourcePixelFormat == dm_444_16) {
3247 *BytePerPixelDETY = 2;
3248 *BytePerPixelDETC = 0;
3249 *BytePerPixelY = 2;
3250 *BytePerPixelC = 0;
3251 } else if (SourcePixelFormat == dm_444_8) {
3252 *BytePerPixelDETY = 1;
3253 *BytePerPixelDETC = 0;
3254 *BytePerPixelY = 1;
3255 *BytePerPixelC = 0;
3256 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3257 *BytePerPixelDETY = 4;
3258 *BytePerPixelDETC = 1;
3259 *BytePerPixelY = 4;
3260 *BytePerPixelC = 1;
3261 } else if (SourcePixelFormat == dm_420_8) {
3262 *BytePerPixelDETY = 1;
3263 *BytePerPixelDETC = 2;
3264 *BytePerPixelY = 1;
3265 *BytePerPixelC = 2;
3266 } else if (SourcePixelFormat == dm_420_12) {
3267 *BytePerPixelDETY = 2;
3268 *BytePerPixelDETC = 4;
3269 *BytePerPixelY = 2;
3270 *BytePerPixelC = 4;
3271 } else {
3272 *BytePerPixelDETY = 4.0 / 3;
3273 *BytePerPixelDETC = 8.0 / 3;
3274 *BytePerPixelY = 2;
3275 *BytePerPixelC = 4;
3276 }
3277
3278 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3279 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3280 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3281 || SourcePixelFormat == dm_rgbe)) {
3282 if (SurfaceTiling == dm_sw_linear) {
3283 *BlockHeight256BytesY = 1;
3284 } else if (SourcePixelFormat == dm_444_64) {
3285 *BlockHeight256BytesY = 4;
3286 } else if (SourcePixelFormat == dm_444_8) {
3287 *BlockHeight256BytesY = 16;
3288 } else {
3289 *BlockHeight256BytesY = 8;
3290 }
3291 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3292 *BlockHeight256BytesC = 0;
3293 *BlockWidth256BytesC = 0;
3294 } else {
3295 if (SurfaceTiling == dm_sw_linear) {
3296 *BlockHeight256BytesY = 1;
3297 *BlockHeight256BytesC = 1;
3298 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3299 *BlockHeight256BytesY = 8;
3300 *BlockHeight256BytesC = 16;
3301 } else if (SourcePixelFormat == dm_420_8) {
3302 *BlockHeight256BytesY = 16;
3303 *BlockHeight256BytesC = 8;
3304 } else {
3305 *BlockHeight256BytesY = 8;
3306 *BlockHeight256BytesC = 8;
3307 }
3308 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3309 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3310 }
3311 return true;
3312 }
3313
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3314 static double CalculateTWait(
3315 unsigned int PrefetchMode,
3316 double DRAMClockChangeLatency,
3317 double UrgentLatency,
3318 double SREnterPlusExitTime)
3319 {
3320 if (PrefetchMode == 0) {
3321 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3322 dml_max(SREnterPlusExitTime, UrgentLatency));
3323 } else if (PrefetchMode == 1) {
3324 return dml_max(SREnterPlusExitTime, UrgentLatency);
3325 } else {
3326 return UrgentLatency;
3327 }
3328 }
3329
dml30_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,long WritebackSourceWidth,long WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)3330 double dml30_CalculateWriteBackDISPCLK(
3331 enum source_format_class WritebackPixelFormat,
3332 double PixelClock,
3333 double WritebackHRatio,
3334 double WritebackVRatio,
3335 unsigned int WritebackHTaps,
3336 unsigned int WritebackVTaps,
3337 long WritebackSourceWidth,
3338 long WritebackDestinationWidth,
3339 unsigned int HTotal,
3340 unsigned int WritebackLineBufferSize)
3341 {
3342 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3343
3344 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3345 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3346 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3347 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3348 }
3349
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,long WritebackDestinationWidth,long WritebackDestinationHeight,long WritebackSourceHeight,unsigned int HTotal)3350 static double CalculateWriteBackDelay(
3351 enum source_format_class WritebackPixelFormat,
3352 double WritebackHRatio,
3353 double WritebackVRatio,
3354 unsigned int WritebackVTaps,
3355 long WritebackDestinationWidth,
3356 long WritebackDestinationHeight,
3357 long WritebackSourceHeight,
3358 unsigned int HTotal)
3359 {
3360 double CalculateWriteBackDelay = 0;
3361 double Line_length = 0;
3362 double Output_lines_last_notclamped = 0;
3363 double WritebackVInit = 0;
3364
3365 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3366 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3367 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3368 if (Output_lines_last_notclamped < 0) {
3369 CalculateWriteBackDelay = 0;
3370 } else {
3371 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3372 }
3373 return CalculateWriteBackDelay;
3374 }
3375
3376
CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters,double DPPCLK,double DISPCLK,double DCFClkDeepSleep,double PixelClock,long HTotal,long VBlank,long DynamicMetadataTransmittedBytes,long DynamicMetadataLinesBeforeActiveRequired,int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * Tsetup,double * Tdmbf,double * Tdmec,double * Tdmsks)3377 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3378 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3379 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3380 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3381 {
3382 double TotalRepeaterDelayTime = 0;
3383 double VUpdateWidthPix = 0;
3384 double VReadyOffsetPix = 0;
3385 double VUpdateOffsetPix = 0;
3386 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3387 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3388 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3389 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3390 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3391 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3392 *Tdmec = HTotal / PixelClock;
3393 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3394 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3395 } else {
3396 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3397 }
3398 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3399 *Tdmsks = *Tdmsks / 2;
3400 }
3401 }
3402
CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3403 static void CalculateRowBandwidth(
3404 bool GPUVMEnable,
3405 enum source_format_class SourcePixelFormat,
3406 double VRatio,
3407 double VRatioChroma,
3408 bool DCCEnable,
3409 double LineTime,
3410 unsigned int MetaRowByteLuma,
3411 unsigned int MetaRowByteChroma,
3412 unsigned int meta_row_height_luma,
3413 unsigned int meta_row_height_chroma,
3414 unsigned int PixelPTEBytesPerRowLuma,
3415 unsigned int PixelPTEBytesPerRowChroma,
3416 unsigned int dpte_row_height_luma,
3417 unsigned int dpte_row_height_chroma,
3418 double *meta_row_bw,
3419 double *dpte_row_bw)
3420 {
3421 if (DCCEnable != true) {
3422 *meta_row_bw = 0;
3423 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3424 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3425 + VRatioChroma * MetaRowByteChroma
3426 / (meta_row_height_chroma * LineTime);
3427 } else {
3428 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3429 }
3430
3431 if (GPUVMEnable != true) {
3432 *dpte_row_bw = 0;
3433 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3434 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3435 + VRatioChroma * PixelPTEBytesPerRowChroma
3436 / (dpte_row_height_chroma * LineTime);
3437 } else {
3438 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3439 }
3440 }
3441
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3442 static void CalculateFlipSchedule(
3443 struct display_mode_lib *mode_lib,
3444 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3445 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3446 double UrgentExtraLatency,
3447 double UrgentLatency,
3448 unsigned int GPUVMMaxPageTableLevels,
3449 bool HostVMEnable,
3450 unsigned int HostVMMaxNonCachedPageTableLevels,
3451 bool GPUVMEnable,
3452 double HostVMMinPageSize,
3453 double PDEAndMetaPTEBytesPerFrame,
3454 double MetaRowBytes,
3455 double DPTEBytesPerRow,
3456 double BandwidthAvailableForImmediateFlip,
3457 unsigned int TotImmediateFlipBytes,
3458 enum source_format_class SourcePixelFormat,
3459 double LineTime,
3460 double VRatio,
3461 double VRatioChroma,
3462 double Tno_bw,
3463 bool DCCEnable,
3464 unsigned int dpte_row_height,
3465 unsigned int meta_row_height,
3466 unsigned int dpte_row_height_chroma,
3467 unsigned int meta_row_height_chroma,
3468 double *DestinationLinesToRequestVMInImmediateFlip,
3469 double *DestinationLinesToRequestRowInImmediateFlip,
3470 double *final_flip_bw,
3471 bool *ImmediateFlipSupportedForPipe)
3472 {
3473 double min_row_time = 0.0;
3474 unsigned int HostVMDynamicLevelsTrips = 0;
3475 double TimeForFetchingMetaPTEImmediateFlip = 0;
3476 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3477 double ImmediateFlipBW = 0;
3478 double HostVMInefficiencyFactor = 0;
3479
3480 if (GPUVMEnable == true && HostVMEnable == true) {
3481 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3482 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3483 } else {
3484 HostVMInefficiencyFactor = 1;
3485 HostVMDynamicLevelsTrips = 0;
3486 }
3487
3488 if (GPUVMEnable == true || DCCEnable == true) {
3489 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3490 }
3491
3492 if (GPUVMEnable == true) {
3493 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3494 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3495 } else {
3496 TimeForFetchingMetaPTEImmediateFlip = 0;
3497 }
3498
3499 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3500 if ((GPUVMEnable == true || DCCEnable == true)) {
3501 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3502 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3503 } else {
3504 TimeForFetchingRowInVBlankImmediateFlip = 0;
3505 }
3506
3507 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3508
3509 if (GPUVMEnable == true) {
3510 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3511 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3512 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3513 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3514 } else {
3515 *final_flip_bw = 0;
3516 }
3517
3518
3519 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3520 if (GPUVMEnable == true && DCCEnable != true) {
3521 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3522 } else if (GPUVMEnable != true && DCCEnable == true) {
3523 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3524 } else {
3525 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3526 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3527 }
3528 } else {
3529 if (GPUVMEnable == true && DCCEnable != true) {
3530 min_row_time = dpte_row_height * LineTime / VRatio;
3531 } else if (GPUVMEnable != true && DCCEnable == true) {
3532 min_row_time = meta_row_height * LineTime / VRatio;
3533 } else {
3534 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3535 }
3536 }
3537
3538 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3539 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3540 *ImmediateFlipSupportedForPipe = false;
3541 } else {
3542 *ImmediateFlipSupportedForPipe = true;
3543 }
3544 }
3545
TruncToValidBPP(double LinkBitRate,int Lanes,long HTotal,long HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,int DSCSlices,int AudioRate,int AudioLayout,enum odm_combine_mode ODMCombine)3546 static double TruncToValidBPP(
3547 double LinkBitRate,
3548 int Lanes,
3549 long HTotal,
3550 long HActive,
3551 double PixelClock,
3552 double DesiredBPP,
3553 bool DSCEnable,
3554 enum output_encoder_class Output,
3555 enum output_format_class Format,
3556 unsigned int DSCInputBitPerComponent,
3557 int DSCSlices,
3558 int AudioRate,
3559 int AudioLayout,
3560 enum odm_combine_mode ODMCombine)
3561 {
3562 double MaxLinkBPP = 0;
3563 int MinDSCBPP = 0;
3564 double MaxDSCBPP = 0;
3565 int NonDSCBPP0 = 0;
3566 int NonDSCBPP1 = 0;
3567 int NonDSCBPP2 = 0;
3568
3569 if (Format == dm_420) {
3570 NonDSCBPP0 = 12;
3571 NonDSCBPP1 = 15;
3572 NonDSCBPP2 = 18;
3573 MinDSCBPP = 6;
3574 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3575 } else if (Format == dm_444) {
3576 NonDSCBPP0 = 24;
3577 NonDSCBPP1 = 30;
3578 NonDSCBPP2 = 36;
3579 MinDSCBPP = 8;
3580 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3581 } else {
3582 if (Output == dm_hdmi) {
3583 NonDSCBPP0 = 24;
3584 NonDSCBPP1 = 24;
3585 NonDSCBPP2 = 24;
3586 }
3587 else {
3588 NonDSCBPP0 = 16;
3589 NonDSCBPP1 = 20;
3590 NonDSCBPP2 = 24;
3591 }
3592
3593 if (Format == dm_n422) {
3594 MinDSCBPP = 7;
3595 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3596 }
3597 else {
3598 MinDSCBPP = 8;
3599 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3600 }
3601 }
3602
3603 if (DSCEnable && Output == dm_dp) {
3604 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3605 } else {
3606 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3607 }
3608
3609 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3610 MaxLinkBPP = 16;
3611 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3612 MaxLinkBPP = 32;
3613 }
3614
3615
3616 if (DesiredBPP == 0) {
3617 if (DSCEnable) {
3618 if (MaxLinkBPP < MinDSCBPP) {
3619 return BPP_INVALID;
3620 } else if (MaxLinkBPP >= MaxDSCBPP) {
3621 return MaxDSCBPP;
3622 } else {
3623 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3624 }
3625 } else {
3626 if (MaxLinkBPP >= NonDSCBPP2) {
3627 return NonDSCBPP2;
3628 } else if (MaxLinkBPP >= NonDSCBPP1) {
3629 return NonDSCBPP1;
3630 } else if (MaxLinkBPP >= NonDSCBPP0) {
3631 return NonDSCBPP0;
3632 } else {
3633 return BPP_INVALID;
3634 }
3635 }
3636 } else {
3637 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3638 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3639 return BPP_INVALID;
3640 } else {
3641 return DesiredBPP;
3642 }
3643 }
3644 return BPP_INVALID;
3645 }
3646
dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3647 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3648 {
3649 struct vba_vars_st *v = &mode_lib->vba;
3650 int MinPrefetchMode, MaxPrefetchMode;
3651 int i;
3652 unsigned int j, k, m;
3653 bool EnoughWritebackUnits = true;
3654 bool WritebackModeSupport = true;
3655 bool ViewportExceedsSurface = false;
3656 double MaxTotalVActiveRDBandwidth = 0;
3657 long ReorderingBytes = 0;
3658 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3659
3660 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3661
3662 CalculateMinAndMaxPrefetchMode(
3663 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3664 &MinPrefetchMode, &MaxPrefetchMode);
3665
3666 /*Scale Ratio, taps Support Check*/
3667
3668 v->ScaleRatioAndTapsSupport = true;
3669 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3670 if (v->ScalerEnabled[k] == false
3671 && ((v->SourcePixelFormat[k] != dm_444_64
3672 && v->SourcePixelFormat[k] != dm_444_32
3673 && v->SourcePixelFormat[k] != dm_444_16
3674 && v->SourcePixelFormat[k] != dm_mono_16
3675 && v->SourcePixelFormat[k] != dm_mono_8
3676 && v->SourcePixelFormat[k] != dm_rgbe
3677 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3678 || v->HRatio[k] != 1.0
3679 || v->htaps[k] != 1.0
3680 || v->VRatio[k] != 1.0
3681 || v->vtaps[k] != 1.0)) {
3682 v->ScaleRatioAndTapsSupport = false;
3683 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3684 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3685 || (v->htaps[k] > 1.0
3686 && (v->htaps[k] % 2) == 1)
3687 || v->HRatio[k] > v->MaxHSCLRatio
3688 || v->VRatio[k] > v->MaxVSCLRatio
3689 || v->HRatio[k] > v->htaps[k]
3690 || v->VRatio[k] > v->vtaps[k]
3691 || (v->SourcePixelFormat[k] != dm_444_64
3692 && v->SourcePixelFormat[k] != dm_444_32
3693 && v->SourcePixelFormat[k] != dm_444_16
3694 && v->SourcePixelFormat[k] != dm_mono_16
3695 && v->SourcePixelFormat[k] != dm_mono_8
3696 && v->SourcePixelFormat[k] != dm_rgbe
3697 && (v->VTAPsChroma[k] < 1
3698 || v->VTAPsChroma[k] > 8
3699 || v->HTAPsChroma[k] < 1
3700 || v->HTAPsChroma[k] > 8
3701 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3702 || v->HRatioChroma[k] > v->MaxHSCLRatio
3703 || v->VRatioChroma[k] > v->MaxVSCLRatio
3704 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3705 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3706 v->ScaleRatioAndTapsSupport = false;
3707 }
3708 }
3709 /*Source Format, Pixel Format and Scan Support Check*/
3710
3711 v->SourceFormatPixelAndScanSupport = true;
3712 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3713 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3714 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3715 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3716 v->SourceFormatPixelAndScanSupport = false;
3717 }
3718 }
3719 /*Bandwidth Support Check*/
3720
3721 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3722 CalculateBytePerPixelAnd256BBlockSizes(
3723 v->SourcePixelFormat[k],
3724 v->SurfaceTiling[k],
3725 &v->BytePerPixelY[k],
3726 &v->BytePerPixelC[k],
3727 &v->BytePerPixelInDETY[k],
3728 &v->BytePerPixelInDETC[k],
3729 &v->Read256BlockHeightY[k],
3730 &v->Read256BlockHeightC[k],
3731 &v->Read256BlockWidthY[k],
3732 &v->Read256BlockWidthC[k]);
3733 }
3734 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3735 if (v->SourceScan[k] != dm_vert) {
3736 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3737 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3738 } else {
3739 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3740 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3741 }
3742 }
3743 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3744 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3745 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3746 }
3747 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3748 if (v->WritebackEnable[k] == true
3749 && v->WritebackPixelFormat[k] == dm_444_64) {
3750 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3751 * v->WritebackDestinationHeight[k]
3752 / (v->WritebackSourceHeight[k]
3753 * v->HTotal[k]
3754 / v->PixelClock[k]) * 8.0;
3755 } else if (v->WritebackEnable[k] == true) {
3756 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3757 * v->WritebackDestinationHeight[k]
3758 / (v->WritebackSourceHeight[k]
3759 * v->HTotal[k]
3760 / v->PixelClock[k]) * 4.0;
3761 } else {
3762 v->WriteBandwidth[k] = 0.0;
3763 }
3764 }
3765
3766 /*Writeback Latency support check*/
3767
3768 v->WritebackLatencySupport = true;
3769 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3770 if (v->WritebackEnable[k] == true) {
3771 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3772 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3773 if (v->WriteBandwidth[k]
3774 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3775 / v->WritebackLatency) {
3776 v->WritebackLatencySupport = false;
3777 }
3778 } else {
3779 if (v->WriteBandwidth[k]
3780 > v->WritebackInterfaceBufferSize * 1024
3781 / v->WritebackLatency) {
3782 v->WritebackLatencySupport = false;
3783 }
3784 }
3785 }
3786 }
3787
3788 /*Writeback Mode Support Check*/
3789
3790 v->TotalNumberOfActiveWriteback = 0;
3791 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3792 if (v->WritebackEnable[k] == true) {
3793 v->TotalNumberOfActiveWriteback =
3794 v->TotalNumberOfActiveWriteback + 1;
3795 }
3796 }
3797
3798 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3799 EnoughWritebackUnits = false;
3800 }
3801 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3802 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3803 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3804
3805 WritebackModeSupport = false;
3806 }
3807 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3808 WritebackModeSupport = false;
3809 }
3810
3811 /*Writeback Scale Ratio and Taps Support Check*/
3812
3813 v->WritebackScaleRatioAndTapsSupport = true;
3814 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3815 if (v->WritebackEnable[k] == true) {
3816 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3817 || v->WritebackVRatio[k]
3818 > v->WritebackMaxVSCLRatio
3819 || v->WritebackHRatio[k]
3820 < v->WritebackMinHSCLRatio
3821 || v->WritebackVRatio[k]
3822 < v->WritebackMinVSCLRatio
3823 || v->WritebackHTaps[k]
3824 > v->WritebackMaxHSCLTaps
3825 || v->WritebackVTaps[k]
3826 > v->WritebackMaxVSCLTaps
3827 || v->WritebackHRatio[k]
3828 > v->WritebackHTaps[k]
3829 || v->WritebackVRatio[k]
3830 > v->WritebackVTaps[k]
3831 || (v->WritebackHTaps[k] > 2.0
3832 && ((v->WritebackHTaps[k] % 2)
3833 == 1))) {
3834 v->WritebackScaleRatioAndTapsSupport = false;
3835 }
3836 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3837 v->WritebackScaleRatioAndTapsSupport = false;
3838 }
3839 }
3840 }
3841 /*Maximum DISPCLK/DPPCLK Support check*/
3842
3843 v->WritebackRequiredDISPCLK = 0.0;
3844 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3845 if (v->WritebackEnable[k] == true) {
3846 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3847 dml30_CalculateWriteBackDISPCLK(
3848 v->WritebackPixelFormat[k],
3849 v->PixelClock[k],
3850 v->WritebackHRatio[k],
3851 v->WritebackVRatio[k],
3852 v->WritebackHTaps[k],
3853 v->WritebackVTaps[k],
3854 v->WritebackSourceWidth[k],
3855 v->WritebackDestinationWidth[k],
3856 v->HTotal[k],
3857 v->WritebackLineBufferSize));
3858 }
3859 }
3860 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3861 if (v->HRatio[k] > 1.0) {
3862 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3863 } else {
3864 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3865 }
3866 if (v->BytePerPixelC[k] == 0.0) {
3867 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3868 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3869 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3870 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3871 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3872 }
3873 } else {
3874 if (v->HRatioChroma[k] > 1.0) {
3875 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3876 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3877 } else {
3878 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3879 }
3880 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3881 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3882 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3883 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3884 1.0);
3885 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3886 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3887 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3888 }
3889 }
3890 }
3891 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3892 int MaximumSwathWidthSupportLuma = 0;
3893 int MaximumSwathWidthSupportChroma = 0;
3894
3895 if (v->SurfaceTiling[k] == dm_sw_linear) {
3896 MaximumSwathWidthSupportLuma = 8192.0;
3897 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3898 MaximumSwathWidthSupportLuma = 2880.0;
3899 } else {
3900 MaximumSwathWidthSupportLuma = 5760.0;
3901 }
3902
3903 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3904 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3905 } else {
3906 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3907 }
3908 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3909 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3910 if (v->BytePerPixelC[k] == 0.0) {
3911 v->MaximumSwathWidthInLineBufferChroma = 0;
3912 } else {
3913 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3914 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3915 }
3916 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3917 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3918 }
3919
3920 CalculateSwathAndDETConfiguration(
3921 true,
3922 v->NumberOfActivePlanes,
3923 v->DETBufferSizeInKByte[0],
3924 v->MaximumSwathWidthLuma,
3925 v->MaximumSwathWidthChroma,
3926 v->SourceScan,
3927 v->SourcePixelFormat,
3928 v->SurfaceTiling,
3929 v->ViewportWidth,
3930 v->ViewportHeight,
3931 v->SurfaceWidthY,
3932 v->SurfaceWidthC,
3933 v->SurfaceHeightY,
3934 v->SurfaceHeightC,
3935 v->Read256BlockHeightY,
3936 v->Read256BlockHeightC,
3937 v->Read256BlockWidthY,
3938 v->Read256BlockWidthC,
3939 v->odm_combine_dummy,
3940 v->BlendingAndTiming,
3941 v->BytePerPixelY,
3942 v->BytePerPixelC,
3943 v->BytePerPixelInDETY,
3944 v->BytePerPixelInDETC,
3945 v->HActive,
3946 v->HRatio,
3947 v->HRatioChroma,
3948 v->DPPPerPlane,
3949 v->swath_width_luma_ub,
3950 v->swath_width_chroma_ub,
3951 v->SwathWidthY,
3952 v->SwathWidthC,
3953 v->SwathHeightY,
3954 v->SwathHeightC,
3955 v->DETBufferSizeY,
3956 v->DETBufferSizeC,
3957 v->SingleDPPViewportSizeSupportPerPlane,
3958 &v->ViewportSizeSupport[0][0]);
3959
3960 for (i = 0; i < v->soc.num_states; i++) {
3961 for (j = 0; j < 2; j++) {
3962 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3963 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3964 v->RequiredDISPCLK[i][j] = 0.0;
3965 v->DISPCLK_DPPCLK_Support[i][j] = true;
3966 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3967 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3968 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3969 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3970 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3971 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3972 }
3973 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3974 * (1 + v->DISPCLKRampingMargin / 100.0);
3975 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3976 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3977 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3978 }
3979 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3980 * (1 + v->DISPCLKRampingMargin / 100.0);
3981 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3982 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3983 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3984 }
3985
3986 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3987 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3988 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3989 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3990 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3991 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3992 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3993 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3994 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3995 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3996 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3997 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3998 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3999 } else {
4000 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4001 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4002 }
4003 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
4004 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4005 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
4006 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4007 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4008 } else {
4009 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4010 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4011 }
4012 }
4013 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
4014 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4015 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
4016 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4017 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4018 } else {
4019 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4020 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4021 }
4022 }
4023 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4024 v->MPCCombine[i][j][k] = false;
4025 v->NoOfDPP[i][j][k] = 4;
4026 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4027 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4028 v->MPCCombine[i][j][k] = false;
4029 v->NoOfDPP[i][j][k] = 2;
4030 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4031 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4032 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
4033 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4034 v->MPCCombine[i][j][k] = false;
4035 v->NoOfDPP[i][j][k] = 1;
4036 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4037 } else {
4038 v->MPCCombine[i][j][k] = true;
4039 v->NoOfDPP[i][j][k] = 2;
4040 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4041 }
4042 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4043 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4044 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4045 v->DISPCLK_DPPCLK_Support[i][j] = false;
4046 }
4047 }
4048 v->TotalNumberOfActiveDPP[i][j] = 0;
4049 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4050 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4051 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4052 if (v->NoOfDPP[i][j][k] == 1)
4053 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4054 }
4055 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
4056 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4057 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4058 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4059 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4060 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4061 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4062 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4063 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4064 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4065 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4066 }
4067 }
4068 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4069 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4070 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4071 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4072 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4073 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4074 }
4075 }
4076 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4077 v->RequiredDISPCLK[i][j] = 0.0;
4078 v->DISPCLK_DPPCLK_Support[i][j] = true;
4079 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4080 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4081 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4082 v->MPCCombine[i][j][k] = true;
4083 v->NoOfDPP[i][j][k] = 2;
4084 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4085 } else {
4086 v->MPCCombine[i][j][k] = false;
4087 v->NoOfDPP[i][j][k] = 1;
4088 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4089 }
4090 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4091 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4092 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4093 } else {
4094 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4095 }
4096 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4097 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4098 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4099 v->DISPCLK_DPPCLK_Support[i][j] = false;
4100 }
4101 }
4102 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4103 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4104 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4105 }
4106 }
4107 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4108 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4109 v->DISPCLK_DPPCLK_Support[i][j] = false;
4110 }
4111 }
4112 }
4113
4114 /*Total Available Pipes Support Check*/
4115
4116 for (i = 0; i < v->soc.num_states; i++) {
4117 for (j = 0; j < 2; j++) {
4118 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4119 v->TotalAvailablePipesSupport[i][j] = true;
4120 } else {
4121 v->TotalAvailablePipesSupport[i][j] = false;
4122 }
4123 }
4124 }
4125 /*Display IO and DSC Support Check*/
4126
4127 v->NonsupportedDSCInputBPC = false;
4128 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4129 if (!(v->DSCInputBitPerComponent[k] == 12.0
4130 || v->DSCInputBitPerComponent[k] == 10.0
4131 || v->DSCInputBitPerComponent[k] == 8.0)) {
4132 v->NonsupportedDSCInputBPC = true;
4133 }
4134 }
4135
4136 /*Number Of DSC Slices*/
4137 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4138 if (v->BlendingAndTiming[k] == k) {
4139 if (v->PixelClockBackEnd[k] > 3200) {
4140 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4141 } else if (v->PixelClockBackEnd[k] > 1360) {
4142 v->NumberOfDSCSlices[k] = 8;
4143 } else if (v->PixelClockBackEnd[k] > 680) {
4144 v->NumberOfDSCSlices[k] = 4;
4145 } else if (v->PixelClockBackEnd[k] > 340) {
4146 v->NumberOfDSCSlices[k] = 2;
4147 } else {
4148 v->NumberOfDSCSlices[k] = 1;
4149 }
4150 } else {
4151 v->NumberOfDSCSlices[k] = 0;
4152 }
4153 }
4154
4155 for (i = 0; i < v->soc.num_states; i++) {
4156 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4157 v->RequiresDSC[i][k] = false;
4158 v->RequiresFEC[i][k] = false;
4159 if (v->BlendingAndTiming[k] == k) {
4160 if (v->Output[k] == dm_hdmi) {
4161 v->RequiresDSC[i][k] = false;
4162 v->RequiresFEC[i][k] = false;
4163 v->OutputBppPerState[i][k] = TruncToValidBPP(
4164 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4165 3,
4166 v->HTotal[k],
4167 v->HActive[k],
4168 v->PixelClockBackEnd[k],
4169 v->ForcedOutputLinkBPP[k],
4170 false,
4171 v->Output[k],
4172 v->OutputFormat[k],
4173 v->DSCInputBitPerComponent[k],
4174 v->NumberOfDSCSlices[k],
4175 v->AudioSampleRate[k],
4176 v->AudioSampleLayout[k],
4177 v->ODMCombineEnablePerState[i][k]);
4178 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4179 if (v->DSCEnable[k] == true) {
4180 v->RequiresDSC[i][k] = true;
4181 v->LinkDSCEnable = true;
4182 if (v->Output[k] == dm_dp) {
4183 v->RequiresFEC[i][k] = true;
4184 } else {
4185 v->RequiresFEC[i][k] = false;
4186 }
4187 } else {
4188 v->RequiresDSC[i][k] = false;
4189 v->LinkDSCEnable = false;
4190 v->RequiresFEC[i][k] = false;
4191 }
4192
4193 v->Outbpp = BPP_INVALID;
4194 if (v->PHYCLKPerState[i] >= 270.0) {
4195 v->Outbpp = TruncToValidBPP(
4196 (1.0 - v->Downspreading / 100.0) * 2700,
4197 v->OutputLinkDPLanes[k],
4198 v->HTotal[k],
4199 v->HActive[k],
4200 v->PixelClockBackEnd[k],
4201 v->ForcedOutputLinkBPP[k],
4202 v->LinkDSCEnable,
4203 v->Output[k],
4204 v->OutputFormat[k],
4205 v->DSCInputBitPerComponent[k],
4206 v->NumberOfDSCSlices[k],
4207 v->AudioSampleRate[k],
4208 v->AudioSampleLayout[k],
4209 v->ODMCombineEnablePerState[i][k]);
4210 v->OutputBppPerState[i][k] = v->Outbpp;
4211 // TODO: Need some other way to handle this nonsense
4212 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4213 }
4214 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4215 v->Outbpp = TruncToValidBPP(
4216 (1.0 - v->Downspreading / 100.0) * 5400,
4217 v->OutputLinkDPLanes[k],
4218 v->HTotal[k],
4219 v->HActive[k],
4220 v->PixelClockBackEnd[k],
4221 v->ForcedOutputLinkBPP[k],
4222 v->LinkDSCEnable,
4223 v->Output[k],
4224 v->OutputFormat[k],
4225 v->DSCInputBitPerComponent[k],
4226 v->NumberOfDSCSlices[k],
4227 v->AudioSampleRate[k],
4228 v->AudioSampleLayout[k],
4229 v->ODMCombineEnablePerState[i][k]);
4230 v->OutputBppPerState[i][k] = v->Outbpp;
4231 // TODO: Need some other way to handle this nonsense
4232 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4233 }
4234 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4235 v->Outbpp = TruncToValidBPP(
4236 (1.0 - v->Downspreading / 100.0) * 8100,
4237 v->OutputLinkDPLanes[k],
4238 v->HTotal[k],
4239 v->HActive[k],
4240 v->PixelClockBackEnd[k],
4241 v->ForcedOutputLinkBPP[k],
4242 v->LinkDSCEnable,
4243 v->Output[k],
4244 v->OutputFormat[k],
4245 v->DSCInputBitPerComponent[k],
4246 v->NumberOfDSCSlices[k],
4247 v->AudioSampleRate[k],
4248 v->AudioSampleLayout[k],
4249 v->ODMCombineEnablePerState[i][k]);
4250 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4251 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4252 v->RequiresDSC[i][k] = true;
4253 v->LinkDSCEnable = true;
4254 if (v->Output[k] == dm_dp) {
4255 v->RequiresFEC[i][k] = true;
4256 }
4257 v->Outbpp = TruncToValidBPP(
4258 (1.0 - v->Downspreading / 100.0) * 8100,
4259 v->OutputLinkDPLanes[k],
4260 v->HTotal[k],
4261 v->HActive[k],
4262 v->PixelClockBackEnd[k],
4263 v->ForcedOutputLinkBPP[k],
4264 v->LinkDSCEnable,
4265 v->Output[k],
4266 v->OutputFormat[k],
4267 v->DSCInputBitPerComponent[k],
4268 v->NumberOfDSCSlices[k],
4269 v->AudioSampleRate[k],
4270 v->AudioSampleLayout[k],
4271 v->ODMCombineEnablePerState[i][k]);
4272 }
4273 v->OutputBppPerState[i][k] = v->Outbpp;
4274 // TODO: Need some other way to handle this nonsense
4275 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4276 }
4277 }
4278 } else {
4279 v->OutputBppPerState[i][k] = 0;
4280 }
4281 }
4282 }
4283 for (i = 0; i < v->soc.num_states; i++) {
4284 v->DIOSupport[i] = true;
4285 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4286 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4287 && (v->OutputBppPerState[i][k] == 0
4288 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4289 v->DIOSupport[i] = false;
4290 }
4291 }
4292 }
4293
4294 for (i = 0; i < v->soc.num_states; ++i) {
4295 v->ODMCombine4To1SupportCheckOK[i] = true;
4296 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4297 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4298 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4299 v->ODMCombine4To1SupportCheckOK[i] = false;
4300 }
4301 }
4302 }
4303
4304 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4305
4306 for (i = 0; i < v->soc.num_states; i++) {
4307 v->NotEnoughDSCUnits[i] = false;
4308 v->TotalDSCUnitsRequired = 0.0;
4309 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4310 if (v->RequiresDSC[i][k] == true) {
4311 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4312 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4313 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4314 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4315 } else {
4316 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4317 }
4318 }
4319 }
4320 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4321 v->NotEnoughDSCUnits[i] = true;
4322 }
4323 }
4324 /*DSC Delay per state*/
4325
4326 for (i = 0; i < v->soc.num_states; i++) {
4327 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4328 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4329 v->BPP = 0.0;
4330 } else {
4331 v->BPP = v->OutputBppPerState[i][k];
4332 }
4333 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4334 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4335 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4336 v->DSCInputBitPerComponent[k],
4337 v->BPP,
4338 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4339 v->NumberOfDSCSlices[k],
4340 v->OutputFormat[k],
4341 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4342 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4343 v->DSCDelayPerState[i][k] = 2.0
4344 * dscceComputeDelay(
4345 v->DSCInputBitPerComponent[k],
4346 v->BPP,
4347 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4348 v->NumberOfDSCSlices[k] / 2,
4349 v->OutputFormat[k],
4350 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4351 } else {
4352 v->DSCDelayPerState[i][k] = 4.0
4353 * (dscceComputeDelay(
4354 v->DSCInputBitPerComponent[k],
4355 v->BPP,
4356 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4357 v->NumberOfDSCSlices[k] / 4,
4358 v->OutputFormat[k],
4359 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4360 }
4361 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4362 } else {
4363 v->DSCDelayPerState[i][k] = 0.0;
4364 }
4365 }
4366 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4367 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4368 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4369 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4370 }
4371 }
4372 }
4373 }
4374
4375 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4376 //
4377 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4378 for (j = 0; j <= 1; ++j) {
4379 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4380 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4381 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4382 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4383 }
4384
4385 CalculateSwathAndDETConfiguration(
4386 false,
4387 v->NumberOfActivePlanes,
4388 v->DETBufferSizeInKByte[0],
4389 v->MaximumSwathWidthLuma,
4390 v->MaximumSwathWidthChroma,
4391 v->SourceScan,
4392 v->SourcePixelFormat,
4393 v->SurfaceTiling,
4394 v->ViewportWidth,
4395 v->ViewportHeight,
4396 v->SurfaceWidthY,
4397 v->SurfaceWidthC,
4398 v->SurfaceHeightY,
4399 v->SurfaceHeightC,
4400 v->Read256BlockHeightY,
4401 v->Read256BlockHeightC,
4402 v->Read256BlockWidthY,
4403 v->Read256BlockWidthC,
4404 v->ODMCombineEnableThisState,
4405 v->BlendingAndTiming,
4406 v->BytePerPixelY,
4407 v->BytePerPixelC,
4408 v->BytePerPixelInDETY,
4409 v->BytePerPixelInDETC,
4410 v->HActive,
4411 v->HRatio,
4412 v->HRatioChroma,
4413 v->NoOfDPPThisState,
4414 v->swath_width_luma_ub_this_state,
4415 v->swath_width_chroma_ub_this_state,
4416 v->SwathWidthYThisState,
4417 v->SwathWidthCThisState,
4418 v->SwathHeightYThisState,
4419 v->SwathHeightCThisState,
4420 v->DETBufferSizeYThisState,
4421 v->DETBufferSizeCThisState,
4422 v->dummystring,
4423 &v->ViewportSizeSupport[i][j]);
4424
4425 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4426 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4427 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4428 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4429 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4430 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4431 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4432 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4433 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4434 }
4435
4436 }
4437 }
4438 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4439 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4440 }
4441
4442 for (i = 0; i < v->soc.num_states; i++) {
4443 for (j = 0; j < 2; j++) {
4444 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4445 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4446 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4447 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4448 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4449 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4450 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4451 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4452 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4453 }
4454
4455 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4456 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4457 if (v->DCCEnable[k] == true) {
4458 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4459 }
4460 }
4461
4462 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4463 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4464 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4465
4466 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4467 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4468 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4469 } else {
4470 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4471 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4472 }
4473
4474 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4475 mode_lib,
4476 v->DCCEnable[k],
4477 v->Read256BlockHeightC[k],
4478 v->Read256BlockWidthY[k],
4479 v->SourcePixelFormat[k],
4480 v->SurfaceTiling[k],
4481 v->BytePerPixelC[k],
4482 v->SourceScan[k],
4483 v->SwathWidthCThisState[k],
4484 v->ViewportHeightChroma[k],
4485 v->GPUVMEnable,
4486 v->HostVMEnable,
4487 v->HostVMMaxNonCachedPageTableLevels,
4488 v->GPUVMMinPageSize,
4489 v->HostVMMinPageSize,
4490 v->PTEBufferSizeInRequestsForChroma,
4491 v->PitchC[k],
4492 0.0,
4493 &v->MacroTileWidthC[k],
4494 &v->MetaRowBytesC,
4495 &v->DPTEBytesPerRowC,
4496 &v->PTEBufferSizeNotExceededC[i][j][k],
4497 &v->dummyinteger7,
4498 &v->dpte_row_height_chroma[k],
4499 &v->dummyinteger28,
4500 &v->dummyinteger26,
4501 &v->dummyinteger23,
4502 &v->meta_row_height_chroma[k],
4503 &v->dummyinteger8,
4504 &v->dummyinteger9,
4505 &v->dummyinteger19,
4506 &v->dummyinteger20,
4507 &v->dummyinteger17,
4508 &v->dummyinteger10,
4509 &v->dummyinteger11);
4510
4511 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4512 mode_lib,
4513 v->VRatioChroma[k],
4514 v->VTAPsChroma[k],
4515 v->Interlace[k],
4516 v->ProgressiveToInterlaceUnitInOPP,
4517 v->SwathHeightCThisState[k],
4518 v->ViewportYStartC[k],
4519 &v->PrefillC[k],
4520 &v->MaxNumSwC[k]);
4521 } else {
4522 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4523 v->PTEBufferSizeInRequestsForChroma = 0;
4524 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4525 v->MetaRowBytesC = 0.0;
4526 v->DPTEBytesPerRowC = 0.0;
4527 v->PrefetchLinesC[i][j][k] = 0.0;
4528 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4529 }
4530 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4531 mode_lib,
4532 v->DCCEnable[k],
4533 v->Read256BlockHeightY[k],
4534 v->Read256BlockWidthY[k],
4535 v->SourcePixelFormat[k],
4536 v->SurfaceTiling[k],
4537 v->BytePerPixelY[k],
4538 v->SourceScan[k],
4539 v->SwathWidthYThisState[k],
4540 v->ViewportHeight[k],
4541 v->GPUVMEnable,
4542 v->HostVMEnable,
4543 v->HostVMMaxNonCachedPageTableLevels,
4544 v->GPUVMMinPageSize,
4545 v->HostVMMinPageSize,
4546 v->PTEBufferSizeInRequestsForLuma,
4547 v->PitchY[k],
4548 v->DCCMetaPitchY[k],
4549 &v->MacroTileWidthY[k],
4550 &v->MetaRowBytesY,
4551 &v->DPTEBytesPerRowY,
4552 &v->PTEBufferSizeNotExceededY[i][j][k],
4553 v->dummyinteger4,
4554 &v->dpte_row_height[k],
4555 &v->dummyinteger29,
4556 &v->dummyinteger27,
4557 &v->dummyinteger24,
4558 &v->meta_row_height[k],
4559 &v->dummyinteger25,
4560 &v->dpte_group_bytes[k],
4561 &v->dummyinteger21,
4562 &v->dummyinteger22,
4563 &v->dummyinteger18,
4564 &v->dummyinteger5,
4565 &v->dummyinteger6);
4566 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4567 mode_lib,
4568 v->VRatio[k],
4569 v->vtaps[k],
4570 v->Interlace[k],
4571 v->ProgressiveToInterlaceUnitInOPP,
4572 v->SwathHeightYThisState[k],
4573 v->ViewportYStartY[k],
4574 &v->PrefillY[k],
4575 &v->MaxNumSwY[k]);
4576 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4577 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4578 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4579
4580 CalculateRowBandwidth(
4581 v->GPUVMEnable,
4582 v->SourcePixelFormat[k],
4583 v->VRatio[k],
4584 v->VRatioChroma[k],
4585 v->DCCEnable[k],
4586 v->HTotal[k] / v->PixelClock[k],
4587 v->MetaRowBytesY,
4588 v->MetaRowBytesC,
4589 v->meta_row_height[k],
4590 v->meta_row_height_chroma[k],
4591 v->DPTEBytesPerRowY,
4592 v->DPTEBytesPerRowC,
4593 v->dpte_row_height[k],
4594 v->dpte_row_height_chroma[k],
4595 &v->meta_row_bandwidth[i][j][k],
4596 &v->dpte_row_bandwidth[i][j][k]);
4597 }
4598 v->UrgLatency[i] = CalculateUrgentLatency(
4599 v->UrgentLatencyPixelDataOnly,
4600 v->UrgentLatencyPixelMixedWithVMData,
4601 v->UrgentLatencyVMDataOnly,
4602 v->DoUrgentLatencyAdjustment,
4603 v->UrgentLatencyAdjustmentFabricClockComponent,
4604 v->UrgentLatencyAdjustmentFabricClockReference,
4605 v->FabricClockPerState[i]);
4606
4607 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4608 CalculateUrgentBurstFactor(
4609 v->swath_width_luma_ub_this_state[k],
4610 v->swath_width_chroma_ub_this_state[k],
4611 v->DETBufferSizeInKByte[0],
4612 v->SwathHeightYThisState[k],
4613 v->SwathHeightCThisState[k],
4614 v->HTotal[k] / v->PixelClock[k],
4615 v->UrgLatency[i],
4616 v->CursorBufferSize,
4617 v->CursorWidth[k][0],
4618 v->CursorBPP[k][0],
4619 v->VRatio[k],
4620 v->VRatioChroma[k],
4621 v->BytePerPixelInDETY[k],
4622 v->BytePerPixelInDETC[k],
4623 v->DETBufferSizeYThisState[k],
4624 v->DETBufferSizeCThisState[k],
4625 &v->UrgentBurstFactorCursor[k],
4626 &v->UrgentBurstFactorLuma[k],
4627 &v->UrgentBurstFactorChroma[k],
4628 &NotUrgentLatencyHiding[k]);
4629 }
4630
4631 v->NotUrgentLatencyHiding[i][j] = false;
4632 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4633 if (NotUrgentLatencyHiding[k]) {
4634 v->NotUrgentLatencyHiding[i][j] = true;
4635 }
4636 }
4637
4638 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4639 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4640 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4641 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4642 }
4643
4644 v->TotalVActivePixelBandwidth[i][j] = 0;
4645 v->TotalVActiveCursorBandwidth[i][j] = 0;
4646 v->TotalMetaRowBandwidth[i][j] = 0;
4647 v->TotalDPTERowBandwidth[i][j] = 0;
4648 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4649 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4650 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4651 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4652 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4653 }
4654
4655 CalculateDCFCLKDeepSleep(
4656 mode_lib,
4657 v->NumberOfActivePlanes,
4658 v->BytePerPixelY,
4659 v->BytePerPixelC,
4660 v->VRatio,
4661 v->VRatioChroma,
4662 v->SwathWidthYThisState,
4663 v->SwathWidthCThisState,
4664 v->NoOfDPPThisState,
4665 v->HRatio,
4666 v->HRatioChroma,
4667 v->PixelClock,
4668 v->PSCL_FACTOR,
4669 v->PSCL_FACTOR_CHROMA,
4670 v->RequiredDPPCLKThisState,
4671 v->ReadBandwidthLuma,
4672 v->ReadBandwidthChroma,
4673 v->ReturnBusWidth,
4674 &v->ProjectedDCFCLKDeepSleep[i][j]);
4675 }
4676 }
4677
4678 //Calculate Return BW
4679
4680 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4681 for (j = 0; j <= 1; ++j) {
4682 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4683 if (v->BlendingAndTiming[k] == k) {
4684 if (v->WritebackEnable[k] == true) {
4685 v->WritebackDelayTime[k] = v->WritebackLatency
4686 + CalculateWriteBackDelay(
4687 v->WritebackPixelFormat[k],
4688 v->WritebackHRatio[k],
4689 v->WritebackVRatio[k],
4690 v->WritebackVTaps[k],
4691 v->WritebackDestinationWidth[k],
4692 v->WritebackDestinationHeight[k],
4693 v->WritebackSourceHeight[k],
4694 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4695 } else {
4696 v->WritebackDelayTime[k] = 0.0;
4697 }
4698 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4699 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4700 v->WritebackDelayTime[k] = dml_max(
4701 v->WritebackDelayTime[k],
4702 v->WritebackLatency
4703 + CalculateWriteBackDelay(
4704 v->WritebackPixelFormat[m],
4705 v->WritebackHRatio[m],
4706 v->WritebackVRatio[m],
4707 v->WritebackVTaps[m],
4708 v->WritebackDestinationWidth[m],
4709 v->WritebackDestinationHeight[m],
4710 v->WritebackSourceHeight[m],
4711 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4712 }
4713 }
4714 }
4715 }
4716 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4717 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4718 if (v->BlendingAndTiming[k] == m) {
4719 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4720 }
4721 }
4722 }
4723 v->MaxMaxVStartup[i][j] = 0;
4724 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4725 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4726 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4727 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4728 }
4729 }
4730 }
4731
4732 ReorderingBytes = v->NumberOfChannels
4733 * dml_max3(
4734 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4735 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4736 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4737 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4738
4739 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4740 for (j = 0; j <= 1; ++j) {
4741 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4742 }
4743 }
4744
4745 if (v->UseMinimumRequiredDCFCLK == true) {
4746 UseMinimumDCFCLK(
4747 mode_lib,
4748 v->MaxInterDCNTileRepeaters,
4749 MaxPrefetchMode,
4750 v->FinalDRAMClockChangeLatency,
4751 v->SREnterPlusExitTime,
4752 v->ReturnBusWidth,
4753 v->RoundTripPingLatencyCycles,
4754 ReorderingBytes,
4755 v->PixelChunkSizeInKByte,
4756 v->MetaChunkSize,
4757 v->GPUVMEnable,
4758 v->GPUVMMaxPageTableLevels,
4759 v->HostVMEnable,
4760 v->NumberOfActivePlanes,
4761 v->HostVMMinPageSize,
4762 v->HostVMMaxNonCachedPageTableLevels,
4763 v->DynamicMetadataVMEnabled,
4764 v->ImmediateFlipRequirement[0],
4765 v->ProgressiveToInterlaceUnitInOPP,
4766 v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
4767 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4768 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4769 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
4770 v->VTotal,
4771 v->VActive,
4772 v->DynamicMetadataTransmittedBytes,
4773 v->DynamicMetadataLinesBeforeActiveRequired,
4774 v->Interlace,
4775 v->RequiredDPPCLK,
4776 v->RequiredDISPCLK,
4777 v->UrgLatency,
4778 v->NoOfDPP,
4779 v->ProjectedDCFCLKDeepSleep,
4780 v->MaximumVStartup,
4781 v->TotalVActivePixelBandwidth,
4782 v->TotalVActiveCursorBandwidth,
4783 v->TotalMetaRowBandwidth,
4784 v->TotalDPTERowBandwidth,
4785 v->TotalNumberOfActiveDPP,
4786 v->TotalNumberOfDCCActiveDPP,
4787 v->dpte_group_bytes,
4788 v->PrefetchLinesY,
4789 v->PrefetchLinesC,
4790 v->swath_width_luma_ub_all_states,
4791 v->swath_width_chroma_ub_all_states,
4792 v->BytePerPixelY,
4793 v->BytePerPixelC,
4794 v->HTotal,
4795 v->PixelClock,
4796 v->PDEAndMetaPTEBytesPerFrame,
4797 v->DPTEBytesPerRow,
4798 v->MetaRowBytes,
4799 v->DynamicMetadataEnable,
4800 v->VActivePixelBandwidth,
4801 v->VActiveCursorBandwidth,
4802 v->ReadBandwidthLuma,
4803 v->ReadBandwidthChroma,
4804 v->DCFCLKPerState,
4805 v->DCFCLKState);
4806
4807 if (v->ClampMinDCFCLK) {
4808 /* Clamp calculated values to actual minimum */
4809 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4810 for (j = 0; j <= 1; ++j) {
4811 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4812 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4813 }
4814 }
4815 }
4816 }
4817 }
4818
4819 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4820 for (j = 0; j <= 1; ++j) {
4821 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4822 v->ReturnBusWidth * v->DCFCLKState[i][j],
4823 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4824 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4825 if (v->HostVMEnable != true) {
4826 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4827 / 100;
4828 } else {
4829 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4830 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4831 }
4832 }
4833 }
4834
4835 //Re-ordering Buffer Support Check
4836
4837 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4838 for (j = 0; j <= 1; ++j) {
4839 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4840 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4841 v->ROBSupport[i][j] = true;
4842 } else {
4843 v->ROBSupport[i][j] = false;
4844 }
4845 }
4846 }
4847
4848 //Vertical Active BW support check
4849
4850 MaxTotalVActiveRDBandwidth = 0;
4851 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4852 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4853 }
4854
4855 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4856 for (j = 0; j <= 1; ++j) {
4857 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4858 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4859 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4860 / 100);
4861 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4862 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4863 } else {
4864 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4865 }
4866 }
4867 }
4868
4869 //Prefetch Check
4870
4871 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4872 for (j = 0; j <= 1; ++j) {
4873 int NextPrefetchModeState = MinPrefetchMode;
4874
4875 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4876
4877 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4878 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4879 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4880 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4881 }
4882
4883 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4884 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4885 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4886 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4887 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4888 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4889 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4890 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4891 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4892 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4893 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4894 }
4895
4896 v->ExtraLatency = CalculateExtraLatency(
4897 v->RoundTripPingLatencyCycles,
4898 ReorderingBytes,
4899 v->DCFCLKState[i][j],
4900 v->TotalNumberOfActiveDPP[i][j],
4901 v->PixelChunkSizeInKByte,
4902 v->TotalNumberOfDCCActiveDPP[i][j],
4903 v->MetaChunkSize,
4904 v->ReturnBWPerState[i][j],
4905 v->GPUVMEnable,
4906 v->HostVMEnable,
4907 v->NumberOfActivePlanes,
4908 v->NoOfDPPThisState,
4909 v->dpte_group_bytes,
4910 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4911 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4912 v->HostVMMinPageSize,
4913 v->HostVMMaxNonCachedPageTableLevels);
4914
4915 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4916 do {
4917 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4918 v->MaxVStartup = v->NextMaxVStartup;
4919
4920 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4921
4922 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4923 Pipe myPipe = { 0 };
4924
4925 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4926 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4927 myPipe.PixelClock = v->PixelClock[k];
4928 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4929 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4930 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4931 myPipe.SourceScan = v->SourceScan[k];
4932 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4933 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4934 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4935 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4936 myPipe.InterlaceEnable = v->Interlace[k];
4937 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4938 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4939 myPipe.HTotal = v->HTotal[k];
4940 myPipe.DCCEnable = v->DCCEnable[k];
4941 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4942
4943 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4944 mode_lib,
4945 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4946 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4947 &myPipe,
4948 v->DSCDelayPerState[i][k],
4949 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4950 v->DPPCLKDelaySCL,
4951 v->DPPCLKDelaySCLLBOnly,
4952 v->DPPCLKDelayCNVCCursor,
4953 v->DISPCLKDelaySubtotal,
4954 v->SwathWidthYThisState[k] / v->HRatio[k],
4955 v->OutputFormat[k],
4956 v->MaxInterDCNTileRepeaters,
4957 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4958 v->MaximumVStartup[i][j][k],
4959 v->GPUVMMaxPageTableLevels,
4960 v->GPUVMEnable,
4961 v->HostVMEnable,
4962 v->HostVMMaxNonCachedPageTableLevels,
4963 v->HostVMMinPageSize,
4964 v->DynamicMetadataEnable[k],
4965 v->DynamicMetadataVMEnabled,
4966 v->DynamicMetadataLinesBeforeActiveRequired[k],
4967 v->DynamicMetadataTransmittedBytes[k],
4968 v->UrgLatency[i],
4969 v->ExtraLatency,
4970 v->TimeCalc,
4971 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4972 v->MetaRowBytes[i][j][k],
4973 v->DPTEBytesPerRow[i][j][k],
4974 v->PrefetchLinesY[i][j][k],
4975 v->SwathWidthYThisState[k],
4976 v->BytePerPixelY[k],
4977 v->PrefillY[k],
4978 v->MaxNumSwY[k],
4979 v->PrefetchLinesC[i][j][k],
4980 v->SwathWidthCThisState[k],
4981 v->BytePerPixelC[k],
4982 v->PrefillC[k],
4983 v->MaxNumSwC[k],
4984 v->swath_width_luma_ub_this_state[k],
4985 v->swath_width_chroma_ub_this_state[k],
4986 v->SwathHeightYThisState[k],
4987 v->SwathHeightCThisState[k],
4988 v->TWait,
4989 v->ProgressiveToInterlaceUnitInOPP,
4990 &v->DSTXAfterScaler[k],
4991 &v->DSTYAfterScaler[k],
4992 &v->LineTimesForPrefetch[k],
4993 &v->PrefetchBW[k],
4994 &v->LinesForMetaPTE[k],
4995 &v->LinesForMetaAndDPTERow[k],
4996 &v->VRatioPreY[i][j][k],
4997 &v->VRatioPreC[i][j][k],
4998 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4999 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
5000 &v->NoTimeForDynamicMetadata[i][j][k],
5001 &v->Tno_bw[k],
5002 &v->prefetch_vmrow_bw[k],
5003 &v->Tdmdl_vm[k],
5004 &v->Tdmdl[k],
5005 &v->VUpdateOffsetPix[k],
5006 &v->VUpdateWidthPix[k],
5007 &v->VReadyOffsetPix[k]);
5008 }
5009
5010 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5011 CalculateUrgentBurstFactor(
5012 v->swath_width_luma_ub_this_state[k],
5013 v->swath_width_chroma_ub_this_state[k],
5014 v->DETBufferSizeInKByte[0],
5015 v->SwathHeightYThisState[k],
5016 v->SwathHeightCThisState[k],
5017 v->HTotal[k] / v->PixelClock[k],
5018 v->UrgentLatency,
5019 v->CursorBufferSize,
5020 v->CursorWidth[k][0],
5021 v->CursorBPP[k][0],
5022 v->VRatioPreY[i][j][k],
5023 v->VRatioPreC[i][j][k],
5024 v->BytePerPixelInDETY[k],
5025 v->BytePerPixelInDETC[k],
5026 v->DETBufferSizeYThisState[k],
5027 v->DETBufferSizeCThisState[k],
5028 &v->UrgentBurstFactorCursorPre[k],
5029 &v->UrgentBurstFactorLumaPre[k],
5030 &v->UrgentBurstFactorChroma[k],
5031 &v->NoUrgentLatencyHidingPre[k]);
5032 }
5033
5034 v->MaximumReadBandwidthWithPrefetch = 0.0;
5035 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5036 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
5037 * v->VRatioPreY[i][j][k];
5038
5039 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
5040 + dml_max4(
5041 v->VActivePixelBandwidth[i][j][k],
5042 v->VActiveCursorBandwidth[i][j][k]
5043 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
5044 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5045 v->NoOfDPP[i][j][k]
5046 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5047 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5048 * v->UrgentBurstFactorChromaPre[k])
5049 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5050 }
5051
5052 v->NotEnoughUrgentLatencyHidingPre = false;
5053 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5054 if (v->NoUrgentLatencyHidingPre[k] == true) {
5055 v->NotEnoughUrgentLatencyHidingPre = true;
5056 }
5057 }
5058
5059 v->PrefetchSupported[i][j] = true;
5060 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5061 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5062 v->PrefetchSupported[i][j] = false;
5063 }
5064 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5065 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5066 || v->NoTimeForPrefetch[i][j][k] == true) {
5067 v->PrefetchSupported[i][j] = false;
5068 }
5069 }
5070
5071 v->DynamicMetadataSupported[i][j] = true;
5072 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5073 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5074 v->DynamicMetadataSupported[i][j] = false;
5075 }
5076 }
5077
5078 v->VRatioInPrefetchSupported[i][j] = true;
5079 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5080 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5081 v->VRatioInPrefetchSupported[i][j] = false;
5082 }
5083 }
5084 v->AnyLinesForVMOrRowTooLarge = false;
5085 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5086 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5087 v->AnyLinesForVMOrRowTooLarge = true;
5088 }
5089 }
5090
5091 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5092 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5093 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5094 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5095 - dml_max(
5096 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5097 v->NoOfDPP[i][j][k]
5098 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5099 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5100 * v->UrgentBurstFactorChromaPre[k])
5101 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5102 }
5103 v->TotImmediateFlipBytes = 0.0;
5104 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5105 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k]
5106 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k];
5107 }
5108
5109 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5110 CalculateFlipSchedule(
5111 mode_lib,
5112 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
5113 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
5114 v->ExtraLatency,
5115 v->UrgLatency[i],
5116 v->GPUVMMaxPageTableLevels,
5117 v->HostVMEnable,
5118 v->HostVMMaxNonCachedPageTableLevels,
5119 v->GPUVMEnable,
5120 v->HostVMMinPageSize,
5121 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5122 v->MetaRowBytes[i][j][k],
5123 v->DPTEBytesPerRow[i][j][k],
5124 v->BandwidthAvailableForImmediateFlip,
5125 v->TotImmediateFlipBytes,
5126 v->SourcePixelFormat[k],
5127 v->HTotal[k] / v->PixelClock[k],
5128 v->VRatio[k],
5129 v->VRatioChroma[k],
5130 v->Tno_bw[k],
5131 v->DCCEnable[k],
5132 v->dpte_row_height[k],
5133 v->meta_row_height[k],
5134 v->dpte_row_height_chroma[k],
5135 v->meta_row_height_chroma[k],
5136 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5137 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5138 &v->final_flip_bw[k],
5139 &v->ImmediateFlipSupportedForPipe[k]);
5140 }
5141 v->total_dcn_read_bw_with_flip = 0.0;
5142 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5143 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5144 + dml_max3(
5145 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5146 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5147 + v->VActiveCursorBandwidth[i][j][k],
5148 v->NoOfDPP[i][j][k]
5149 * (v->final_flip_bw[k]
5150 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5151 * v->UrgentBurstFactorLumaPre[k]
5152 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5153 * v->UrgentBurstFactorChromaPre[k])
5154 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5155 }
5156 v->ImmediateFlipSupportedForState[i][j] = true;
5157 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5158 v->ImmediateFlipSupportedForState[i][j] = false;
5159 }
5160 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5161 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5162 v->ImmediateFlipSupportedForState[i][j] = false;
5163 }
5164 }
5165 } else {
5166 v->ImmediateFlipSupportedForState[i][j] = false;
5167 }
5168 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5169 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5170 NextPrefetchModeState = NextPrefetchModeState + 1;
5171 } else {
5172 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5173 }
5174 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5175 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5176 || v->ImmediateFlipSupportedForState[i][j] == true))
5177 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5178
5179 CalculateWatermarksAndDRAMSpeedChangeSupport(
5180 mode_lib,
5181 v->PrefetchModePerState[i][j],
5182 v->NumberOfActivePlanes,
5183 v->MaxLineBufferLines,
5184 v->LineBufferSize,
5185 v->DPPOutputBufferPixels,
5186 v->DETBufferSizeInKByte[0],
5187 v->WritebackInterfaceBufferSize,
5188 v->DCFCLKState[i][j],
5189 v->ReturnBWPerState[i][j],
5190 v->GPUVMEnable,
5191 v->dpte_group_bytes,
5192 v->MetaChunkSize,
5193 v->UrgLatency[i],
5194 v->ExtraLatency,
5195 v->WritebackLatency,
5196 v->WritebackChunkSize,
5197 v->SOCCLKPerState[i],
5198 v->FinalDRAMClockChangeLatency,
5199 v->SRExitTime,
5200 v->SREnterPlusExitTime,
5201 v->ProjectedDCFCLKDeepSleep[i][j],
5202 v->NoOfDPPThisState,
5203 v->DCCEnable,
5204 v->RequiredDPPCLKThisState,
5205 v->DETBufferSizeYThisState,
5206 v->DETBufferSizeCThisState,
5207 v->SwathHeightYThisState,
5208 v->SwathHeightCThisState,
5209 v->LBBitPerPixel,
5210 v->SwathWidthYThisState,
5211 v->SwathWidthCThisState,
5212 v->HRatio,
5213 v->HRatioChroma,
5214 v->vtaps,
5215 v->VTAPsChroma,
5216 v->VRatio,
5217 v->VRatioChroma,
5218 v->HTotal,
5219 v->PixelClock,
5220 v->BlendingAndTiming,
5221 v->BytePerPixelInDETY,
5222 v->BytePerPixelInDETC,
5223 v->DSTXAfterScaler,
5224 v->DSTYAfterScaler,
5225 v->WritebackEnable,
5226 v->WritebackPixelFormat,
5227 v->WritebackDestinationWidth,
5228 v->WritebackDestinationHeight,
5229 v->WritebackSourceHeight,
5230 &v->DRAMClockChangeSupport[i][j],
5231 &v->UrgentWatermark,
5232 &v->WritebackUrgentWatermark,
5233 &v->DRAMClockChangeWatermark,
5234 &v->WritebackDRAMClockChangeWatermark,
5235 &v->StutterExitWatermark,
5236 &v->StutterEnterPlusExitWatermark,
5237 &v->MinActiveDRAMClockChangeLatencySupported);
5238 }
5239 }
5240
5241 /*PTE Buffer Size Check*/
5242
5243 for (i = 0; i < v->soc.num_states; i++) {
5244 for (j = 0; j < 2; j++) {
5245 v->PTEBufferSizeNotExceeded[i][j] = true;
5246 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5247 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5248 v->PTEBufferSizeNotExceeded[i][j] = false;
5249 }
5250 }
5251 }
5252 }
5253 /*Cursor Support Check*/
5254
5255 v->CursorSupport = true;
5256 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5257 if (v->CursorWidth[k][0] > 0.0) {
5258 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5259 v->CursorSupport = false;
5260 }
5261 }
5262 }
5263 /*Valid Pitch Check*/
5264
5265 v->PitchSupport = true;
5266 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5267 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5268 if (v->DCCEnable[k] == true) {
5269 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5270 } else {
5271 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5272 }
5273 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5274 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5275 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5276 if (v->DCCEnable[k] == true) {
5277 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5278 } else {
5279 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5280 }
5281 } else {
5282 v->AlignedCPitch[k] = v->PitchC[k];
5283 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5284 }
5285 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5286 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5287 v->PitchSupport = false;
5288 }
5289 }
5290
5291 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5292 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5293 ViewportExceedsSurface = true;
5294
5295 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5296 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5297 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5298 ViewportExceedsSurface = true;
5299 }
5300 }
5301 }
5302 /*Mode Support, Voltage State and SOC Configuration*/
5303
5304 for (i = v->soc.num_states - 1; i >= 0; i--) {
5305 for (j = 0; j < 2; j++) {
5306 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5307 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5308 && v->NotEnoughDSCUnits[i] == 0
5309 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5310 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5311 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5312 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5313 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5314 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5315 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5316 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5317 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5318 v->ModeSupport[i][j] = true;
5319 } else {
5320 v->ModeSupport[i][j] = false;
5321 }
5322 }
5323 }
5324 {
5325 unsigned int MaximumMPCCombine = 0;
5326 for (i = v->soc.num_states; i >= 0; i--) {
5327 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5328 v->VoltageLevel = i;
5329 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5330 if (v->ModeSupport[i][1] == true) {
5331 MaximumMPCCombine = 1;
5332 } else {
5333 MaximumMPCCombine = 0;
5334 }
5335 }
5336 }
5337 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5338 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5339 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5340 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5341 }
5342 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5343 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5344 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5345 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5346 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5347 v->maxMpcComb = MaximumMPCCombine;
5348 }
5349 }
5350
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,unsigned int DETBufferSizeInKByte,unsigned int WritebackInterfaceBufferSize,double DCFCLK,double ReturnBW,bool GPUVMEnable,unsigned int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,unsigned int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int LBBitPerPixel[],double SwathWidthY[],double SwathWidthC[],double HRatio[],double HRatioChroma[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],double VRatioChroma[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5351 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5352 struct display_mode_lib *mode_lib,
5353 unsigned int PrefetchMode,
5354 unsigned int NumberOfActivePlanes,
5355 unsigned int MaxLineBufferLines,
5356 unsigned int LineBufferSize,
5357 unsigned int DPPOutputBufferPixels,
5358 unsigned int DETBufferSizeInKByte,
5359 unsigned int WritebackInterfaceBufferSize,
5360 double DCFCLK,
5361 double ReturnBW,
5362 bool GPUVMEnable,
5363 unsigned int dpte_group_bytes[],
5364 unsigned int MetaChunkSize,
5365 double UrgentLatency,
5366 double ExtraLatency,
5367 double WritebackLatency,
5368 double WritebackChunkSize,
5369 double SOCCLK,
5370 double DRAMClockChangeLatency,
5371 double SRExitTime,
5372 double SREnterPlusExitTime,
5373 double DCFCLKDeepSleep,
5374 unsigned int DPPPerPlane[],
5375 bool DCCEnable[],
5376 double DPPCLK[],
5377 unsigned int DETBufferSizeY[],
5378 unsigned int DETBufferSizeC[],
5379 unsigned int SwathHeightY[],
5380 unsigned int SwathHeightC[],
5381 unsigned int LBBitPerPixel[],
5382 double SwathWidthY[],
5383 double SwathWidthC[],
5384 double HRatio[],
5385 double HRatioChroma[],
5386 unsigned int vtaps[],
5387 unsigned int VTAPsChroma[],
5388 double VRatio[],
5389 double VRatioChroma[],
5390 unsigned int HTotal[],
5391 double PixelClock[],
5392 unsigned int BlendingAndTiming[],
5393 double BytePerPixelDETY[],
5394 double BytePerPixelDETC[],
5395 double DSTXAfterScaler[],
5396 double DSTYAfterScaler[],
5397 bool WritebackEnable[],
5398 enum source_format_class WritebackPixelFormat[],
5399 double WritebackDestinationWidth[],
5400 double WritebackDestinationHeight[],
5401 double WritebackSourceHeight[],
5402 enum clock_change_support *DRAMClockChangeSupport,
5403 double *UrgentWatermark,
5404 double *WritebackUrgentWatermark,
5405 double *DRAMClockChangeWatermark,
5406 double *WritebackDRAMClockChangeWatermark,
5407 double *StutterExitWatermark,
5408 double *StutterEnterPlusExitWatermark,
5409 double *MinActiveDRAMClockChangeLatencySupported)
5410 {
5411 double EffectiveLBLatencyHidingY = 0;
5412 double EffectiveLBLatencyHidingC = 0;
5413 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5414 double LinesInDETC = 0;
5415 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5416 unsigned int LinesInDETCRoundedDownToSwath = 0;
5417 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5418 double FullDETBufferingTimeC = 0;
5419 double ActiveDRAMClockChangeLatencyMarginY = 0;
5420 double ActiveDRAMClockChangeLatencyMarginC = 0;
5421 double WritebackDRAMClockChangeLatencyMargin = 0;
5422 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5423 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5424 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5425 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5426 double WritebackDRAMClockChangeLatencyHiding = 0;
5427 unsigned int k, j;
5428
5429 mode_lib->vba.TotalActiveDPP = 0;
5430 mode_lib->vba.TotalDCCActiveDPP = 0;
5431 for (k = 0; k < NumberOfActivePlanes; ++k) {
5432 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5433 if (DCCEnable[k] == true) {
5434 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5435 }
5436 }
5437
5438 *UrgentWatermark = UrgentLatency + ExtraLatency;
5439
5440 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5441
5442 mode_lib->vba.TotalActiveWriteback = 0;
5443 for (k = 0; k < NumberOfActivePlanes; ++k) {
5444 if (WritebackEnable[k] == true) {
5445 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5446 }
5447 }
5448
5449 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5450 *WritebackUrgentWatermark = WritebackLatency;
5451 } else {
5452 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5453 }
5454
5455 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5456 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5457 } else {
5458 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5459 }
5460
5461 for (k = 0; k < NumberOfActivePlanes; ++k) {
5462
5463 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5464
5465 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5466
5467 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5468
5469 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5470
5471 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5472 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5473 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5474 if (BytePerPixelDETC[k] > 0) {
5475 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5476 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5477 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5478 } else {
5479 LinesInDETC = 0;
5480 FullDETBufferingTimeC = 999999;
5481 }
5482
5483 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5484
5485 if (NumberOfActivePlanes > 1) {
5486 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5487 }
5488
5489 if (BytePerPixelDETC[k] > 0) {
5490 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5491
5492 if (NumberOfActivePlanes > 1) {
5493 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5494 }
5495 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5496 } else {
5497 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5498 }
5499
5500 if (WritebackEnable[k] == true) {
5501
5502 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5503 if (WritebackPixelFormat[k] == dm_444_64) {
5504 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5505 }
5506 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5507 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5508 }
5509 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5510 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5511 }
5512 }
5513
5514 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5515 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5516 for (k = 0; k < NumberOfActivePlanes; ++k) {
5517 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5518 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5519 if (BlendingAndTiming[k] == k) {
5520 PlaneWithMinActiveDRAMClockChangeMargin = k;
5521 } else {
5522 for (j = 0; j < NumberOfActivePlanes; ++j) {
5523 if (BlendingAndTiming[k] == j) {
5524 PlaneWithMinActiveDRAMClockChangeMargin = j;
5525 }
5526 }
5527 }
5528 }
5529 }
5530
5531 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5532
5533 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5534 for (k = 0; k < NumberOfActivePlanes; ++k) {
5535 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5536 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5537 }
5538 }
5539
5540 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5541 for (k = 0; k < NumberOfActivePlanes; ++k) {
5542 if (BlendingAndTiming[k] == k) {
5543 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5544 }
5545 }
5546
5547 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5548 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5549 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5550 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5551 } else {
5552 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5553 }
5554
5555 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5556 for (k = 0; k < NumberOfActivePlanes; ++k) {
5557 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5558 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5559 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5560 }
5561 }
5562
5563 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5564 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5565
5566 }
5567
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,int BytePerPixelY[],int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],int ReturnBusWidth,double * DCFCLKDeepSleep)5568 static void CalculateDCFCLKDeepSleep(
5569 struct display_mode_lib *mode_lib,
5570 unsigned int NumberOfActivePlanes,
5571 int BytePerPixelY[],
5572 int BytePerPixelC[],
5573 double VRatio[],
5574 double VRatioChroma[],
5575 double SwathWidthY[],
5576 double SwathWidthC[],
5577 unsigned int DPPPerPlane[],
5578 double HRatio[],
5579 double HRatioChroma[],
5580 double PixelClock[],
5581 double PSCL_THROUGHPUT[],
5582 double PSCL_THROUGHPUT_CHROMA[],
5583 double DPPCLK[],
5584 double ReadBandwidthLuma[],
5585 double ReadBandwidthChroma[],
5586 int ReturnBusWidth,
5587 double *DCFCLKDeepSleep)
5588 {
5589 double DisplayPipeLineDeliveryTimeLuma = 0;
5590 double DisplayPipeLineDeliveryTimeChroma = 0;
5591 unsigned int k;
5592 double ReadBandwidth = 0.0;
5593
5594 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5595 for (k = 0; k < NumberOfActivePlanes; ++k) {
5596
5597 if (VRatio[k] <= 1) {
5598 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5599 } else {
5600 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5601 }
5602 if (BytePerPixelC[k] == 0) {
5603 DisplayPipeLineDeliveryTimeChroma = 0;
5604 } else {
5605 if (VRatioChroma[k] <= 1) {
5606 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5607 } else {
5608 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5609 }
5610 }
5611
5612 if (BytePerPixelC[k] > 0) {
5613 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5614 } else {
5615 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5616 }
5617 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5618
5619 }
5620
5621 for (k = 0; k < NumberOfActivePlanes; ++k) {
5622 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5623 }
5624
5625 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5626
5627 for (k = 0; k < NumberOfActivePlanes; ++k) {
5628 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5629 }
5630 }
5631
CalculateUrgentBurstFactor(long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,double DETBufferSizeY,double DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)5632 static void CalculateUrgentBurstFactor(
5633 long swath_width_luma_ub,
5634 long swath_width_chroma_ub,
5635 unsigned int DETBufferSizeInKByte,
5636 unsigned int SwathHeightY,
5637 unsigned int SwathHeightC,
5638 double LineTime,
5639 double UrgentLatency,
5640 double CursorBufferSize,
5641 unsigned int CursorWidth,
5642 unsigned int CursorBPP,
5643 double VRatio,
5644 double VRatioC,
5645 double BytePerPixelInDETY,
5646 double BytePerPixelInDETC,
5647 double DETBufferSizeY,
5648 double DETBufferSizeC,
5649 double *UrgentBurstFactorCursor,
5650 double *UrgentBurstFactorLuma,
5651 double *UrgentBurstFactorChroma,
5652 bool *NotEnoughUrgentLatencyHiding)
5653 {
5654 double LinesInDETLuma = 0;
5655 double LinesInDETChroma = 0;
5656 unsigned int LinesInCursorBuffer = 0;
5657 double CursorBufferSizeInTime = 0;
5658 double DETBufferSizeInTimeLuma = 0;
5659 double DETBufferSizeInTimeChroma = 0;
5660
5661 *NotEnoughUrgentLatencyHiding = 0;
5662
5663 if (CursorWidth > 0) {
5664 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5665 if (VRatio > 0) {
5666 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5667 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5668 *NotEnoughUrgentLatencyHiding = 1;
5669 *UrgentBurstFactorCursor = 0;
5670 } else {
5671 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5672 }
5673 } else {
5674 *UrgentBurstFactorCursor = 1;
5675 }
5676 }
5677
5678 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5679 if (VRatio > 0) {
5680 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5681 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5682 *NotEnoughUrgentLatencyHiding = 1;
5683 *UrgentBurstFactorLuma = 0;
5684 } else {
5685 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5686 }
5687 } else {
5688 *UrgentBurstFactorLuma = 1;
5689 }
5690
5691 if (BytePerPixelInDETC > 0) {
5692 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5693 if (VRatio > 0) {
5694 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5695 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5696 *NotEnoughUrgentLatencyHiding = 1;
5697 *UrgentBurstFactorChroma = 0;
5698 } else {
5699 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5700 }
5701 } else {
5702 *UrgentBurstFactorChroma = 1;
5703 }
5704 }
5705 }
5706
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],int BytePerPixelC[],enum scan_direction_class SourceScan[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][2],unsigned int CursorBPP[][2],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])5707 static void CalculatePixelDeliveryTimes(
5708 unsigned int NumberOfActivePlanes,
5709 double VRatio[],
5710 double VRatioChroma[],
5711 double VRatioPrefetchY[],
5712 double VRatioPrefetchC[],
5713 unsigned int swath_width_luma_ub[],
5714 unsigned int swath_width_chroma_ub[],
5715 unsigned int DPPPerPlane[],
5716 double HRatio[],
5717 double HRatioChroma[],
5718 double PixelClock[],
5719 double PSCL_THROUGHPUT[],
5720 double PSCL_THROUGHPUT_CHROMA[],
5721 double DPPCLK[],
5722 int BytePerPixelC[],
5723 enum scan_direction_class SourceScan[],
5724 unsigned int NumberOfCursors[],
5725 unsigned int CursorWidth[][2],
5726 unsigned int CursorBPP[][2],
5727 unsigned int BlockWidth256BytesY[],
5728 unsigned int BlockHeight256BytesY[],
5729 unsigned int BlockWidth256BytesC[],
5730 unsigned int BlockHeight256BytesC[],
5731 double DisplayPipeLineDeliveryTimeLuma[],
5732 double DisplayPipeLineDeliveryTimeChroma[],
5733 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5734 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5735 double DisplayPipeRequestDeliveryTimeLuma[],
5736 double DisplayPipeRequestDeliveryTimeChroma[],
5737 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5738 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5739 double CursorRequestDeliveryTime[],
5740 double CursorRequestDeliveryTimePrefetch[])
5741 {
5742 double req_per_swath_ub = 0;
5743 unsigned int k;
5744
5745 for (k = 0; k < NumberOfActivePlanes; ++k) {
5746 if (VRatio[k] <= 1) {
5747 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5748 } else {
5749 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5750 }
5751
5752 if (BytePerPixelC[k] == 0) {
5753 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5754 } else {
5755 if (VRatioChroma[k] <= 1) {
5756 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5757 } else {
5758 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5759 }
5760 }
5761
5762 if (VRatioPrefetchY[k] <= 1) {
5763 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5764 } else {
5765 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5766 }
5767
5768 if (BytePerPixelC[k] == 0) {
5769 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5770 } else {
5771 if (VRatioPrefetchC[k] <= 1) {
5772 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5773 } else {
5774 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5775 }
5776 }
5777 }
5778
5779 for (k = 0; k < NumberOfActivePlanes; ++k) {
5780 if (SourceScan[k] != dm_vert) {
5781 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5782 } else {
5783 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5784 }
5785 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5786 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5787 if (BytePerPixelC[k] == 0) {
5788 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5789 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5790 } else {
5791 if (SourceScan[k] != dm_vert) {
5792 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5793 } else {
5794 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5795 }
5796 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5797 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5798 }
5799 }
5800
5801 for (k = 0; k < NumberOfActivePlanes; ++k) {
5802 int cursor_req_per_width = 0;
5803 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5804 if (NumberOfCursors[k] > 0) {
5805 if (VRatio[k] <= 1) {
5806 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5807 } else {
5808 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5809 }
5810 if (VRatioPrefetchY[k] <= 1) {
5811 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5812 } else {
5813 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5814 }
5815 } else {
5816 CursorRequestDeliveryTime[k] = 0;
5817 CursorRequestDeliveryTimePrefetch[k] = 0;
5818 }
5819 }
5820 }
5821
CalculateMetaAndPTETimes(int NumberOfActivePlanes,bool GPUVMEnable,int MetaChunkSize,int MinMetaChunkSizeBytes,int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],int BytePerPixelY[],int BytePerPixelC[],enum scan_direction_class SourceScan[],int dpte_row_height[],int dpte_row_height_chroma[],int meta_row_width[],int meta_row_width_chroma[],int meta_row_height[],int meta_row_height_chroma[],int meta_req_width[],int meta_req_width_chroma[],int meta_req_height[],int meta_req_height_chroma[],int dpte_group_bytes[],int PTERequestSizeY[],int PTERequestSizeC[],int PixelPTEReqWidthY[],int PixelPTEReqHeightY[],int PixelPTEReqWidthC[],int PixelPTEReqHeightC[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])5822 static void CalculateMetaAndPTETimes(
5823 int NumberOfActivePlanes,
5824 bool GPUVMEnable,
5825 int MetaChunkSize,
5826 int MinMetaChunkSizeBytes,
5827 int HTotal[],
5828 double VRatio[],
5829 double VRatioChroma[],
5830 double DestinationLinesToRequestRowInVBlank[],
5831 double DestinationLinesToRequestRowInImmediateFlip[],
5832 bool DCCEnable[],
5833 double PixelClock[],
5834 int BytePerPixelY[],
5835 int BytePerPixelC[],
5836 enum scan_direction_class SourceScan[],
5837 int dpte_row_height[],
5838 int dpte_row_height_chroma[],
5839 int meta_row_width[],
5840 int meta_row_width_chroma[],
5841 int meta_row_height[],
5842 int meta_row_height_chroma[],
5843 int meta_req_width[],
5844 int meta_req_width_chroma[],
5845 int meta_req_height[],
5846 int meta_req_height_chroma[],
5847 int dpte_group_bytes[],
5848 int PTERequestSizeY[],
5849 int PTERequestSizeC[],
5850 int PixelPTEReqWidthY[],
5851 int PixelPTEReqHeightY[],
5852 int PixelPTEReqWidthC[],
5853 int PixelPTEReqHeightC[],
5854 int dpte_row_width_luma_ub[],
5855 int dpte_row_width_chroma_ub[],
5856 double DST_Y_PER_PTE_ROW_NOM_L[],
5857 double DST_Y_PER_PTE_ROW_NOM_C[],
5858 double DST_Y_PER_META_ROW_NOM_L[],
5859 double DST_Y_PER_META_ROW_NOM_C[],
5860 double TimePerMetaChunkNominal[],
5861 double TimePerChromaMetaChunkNominal[],
5862 double TimePerMetaChunkVBlank[],
5863 double TimePerChromaMetaChunkVBlank[],
5864 double TimePerMetaChunkFlip[],
5865 double TimePerChromaMetaChunkFlip[],
5866 double time_per_pte_group_nom_luma[],
5867 double time_per_pte_group_vblank_luma[],
5868 double time_per_pte_group_flip_luma[],
5869 double time_per_pte_group_nom_chroma[],
5870 double time_per_pte_group_vblank_chroma[],
5871 double time_per_pte_group_flip_chroma[])
5872 {
5873 unsigned int meta_chunk_width = 0;
5874 unsigned int min_meta_chunk_width = 0;
5875 unsigned int meta_chunk_per_row_int = 0;
5876 unsigned int meta_row_remainder = 0;
5877 unsigned int meta_chunk_threshold = 0;
5878 unsigned int meta_chunks_per_row_ub = 0;
5879 unsigned int meta_chunk_width_chroma = 0;
5880 unsigned int min_meta_chunk_width_chroma = 0;
5881 unsigned int meta_chunk_per_row_int_chroma = 0;
5882 unsigned int meta_row_remainder_chroma = 0;
5883 unsigned int meta_chunk_threshold_chroma = 0;
5884 unsigned int meta_chunks_per_row_ub_chroma = 0;
5885 unsigned int dpte_group_width_luma = 0;
5886 unsigned int dpte_groups_per_row_luma_ub = 0;
5887 unsigned int dpte_group_width_chroma = 0;
5888 unsigned int dpte_groups_per_row_chroma_ub = 0;
5889 unsigned int k;
5890
5891 for (k = 0; k < NumberOfActivePlanes; ++k) {
5892 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5893 if (BytePerPixelC[k] == 0) {
5894 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5895 } else {
5896 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5897 }
5898 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5899 if (BytePerPixelC[k] == 0) {
5900 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5901 } else {
5902 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5903 }
5904 }
5905
5906 for (k = 0; k < NumberOfActivePlanes; ++k) {
5907 if (DCCEnable[k] == true) {
5908 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5909 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5910 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5911 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5912 if (SourceScan[k] != dm_vert) {
5913 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5914 } else {
5915 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5916 }
5917 if (meta_row_remainder <= meta_chunk_threshold) {
5918 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5919 } else {
5920 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5921 }
5922 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5923 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5924 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5925 if (BytePerPixelC[k] == 0) {
5926 TimePerChromaMetaChunkNominal[k] = 0;
5927 TimePerChromaMetaChunkVBlank[k] = 0;
5928 TimePerChromaMetaChunkFlip[k] = 0;
5929 } else {
5930 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5931 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5932 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5933 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5934 if (SourceScan[k] != dm_vert) {
5935 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5936 } else {
5937 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5938 }
5939 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5940 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5941 } else {
5942 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5943 }
5944 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5945 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5946 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5947 }
5948 } else {
5949 TimePerMetaChunkNominal[k] = 0;
5950 TimePerMetaChunkVBlank[k] = 0;
5951 TimePerMetaChunkFlip[k] = 0;
5952 TimePerChromaMetaChunkNominal[k] = 0;
5953 TimePerChromaMetaChunkVBlank[k] = 0;
5954 TimePerChromaMetaChunkFlip[k] = 0;
5955 }
5956 }
5957
5958 for (k = 0; k < NumberOfActivePlanes; ++k) {
5959 if (GPUVMEnable == true) {
5960 if (SourceScan[k] != dm_vert) {
5961 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5962 } else {
5963 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5964 }
5965 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5966 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5967 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5968 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5969 if (BytePerPixelC[k] == 0) {
5970 time_per_pte_group_nom_chroma[k] = 0;
5971 time_per_pte_group_vblank_chroma[k] = 0;
5972 time_per_pte_group_flip_chroma[k] = 0;
5973 } else {
5974 if (SourceScan[k] != dm_vert) {
5975 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5976 } else {
5977 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5978 }
5979 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5980 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5981 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5982 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5983 }
5984 } else {
5985 time_per_pte_group_nom_luma[k] = 0;
5986 time_per_pte_group_vblank_luma[k] = 0;
5987 time_per_pte_group_flip_luma[k] = 0;
5988 time_per_pte_group_nom_chroma[k] = 0;
5989 time_per_pte_group_vblank_chroma[k] = 0;
5990 time_per_pte_group_flip_chroma[k] = 0;
5991 }
5992 }
5993 }
5994
CalculateVMGroupAndRequestTimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],int meta_pte_bytes_per_frame_ub_l[],int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5995 static void CalculateVMGroupAndRequestTimes(
5996 unsigned int NumberOfActivePlanes,
5997 bool GPUVMEnable,
5998 unsigned int GPUVMMaxPageTableLevels,
5999 unsigned int HTotal[],
6000 int BytePerPixelC[],
6001 double DestinationLinesToRequestVMInVBlank[],
6002 double DestinationLinesToRequestVMInImmediateFlip[],
6003 bool DCCEnable[],
6004 double PixelClock[],
6005 int dpte_row_width_luma_ub[],
6006 int dpte_row_width_chroma_ub[],
6007 int vm_group_bytes[],
6008 unsigned int dpde0_bytes_per_frame_ub_l[],
6009 unsigned int dpde0_bytes_per_frame_ub_c[],
6010 int meta_pte_bytes_per_frame_ub_l[],
6011 int meta_pte_bytes_per_frame_ub_c[],
6012 double TimePerVMGroupVBlank[],
6013 double TimePerVMGroupFlip[],
6014 double TimePerVMRequestVBlank[],
6015 double TimePerVMRequestFlip[])
6016 {
6017 int num_group_per_lower_vm_stage = 0;
6018 int num_req_per_lower_vm_stage = 0;
6019 unsigned int k;
6020
6021 for (k = 0; k < NumberOfActivePlanes; ++k) {
6022 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6023 if (DCCEnable[k] == false) {
6024 if (BytePerPixelC[k] > 0) {
6025 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6026 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
6027 / (double) (vm_group_bytes[k]), 1);
6028 } else {
6029 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6030 / (double) (vm_group_bytes[k]), 1);
6031 }
6032 } else {
6033 if (GPUVMMaxPageTableLevels == 1) {
6034 if (BytePerPixelC[k] > 0) {
6035 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6036 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
6037 / (double) (vm_group_bytes[k]), 1);
6038 } else {
6039 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6040 / (double) (vm_group_bytes[k]), 1);
6041 }
6042 } else {
6043 if (BytePerPixelC[k] > 0) {
6044 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6045 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6046 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6047 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6048 } else {
6049 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6050 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6051 }
6052 }
6053 }
6054
6055 if (DCCEnable[k] == false) {
6056 if (BytePerPixelC[k] > 0) {
6057 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6058 } else {
6059 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6060 }
6061 } else {
6062 if (GPUVMMaxPageTableLevels == 1) {
6063 if (BytePerPixelC[k] > 0) {
6064 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6065 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6066 } else {
6067 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6068 }
6069 } else {
6070 if (BytePerPixelC[k] > 0) {
6071 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6072 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
6073 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6074 } else {
6075 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6076 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6077 }
6078 }
6079 }
6080
6081 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6082 / num_group_per_lower_vm_stage;
6083 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6084 / num_group_per_lower_vm_stage;
6085 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6086 / num_req_per_lower_vm_stage;
6087 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6088 / num_req_per_lower_vm_stage;
6089
6090 if (GPUVMMaxPageTableLevels > 2) {
6091 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6092 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6093 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6094 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6095 }
6096
6097 } else {
6098 TimePerVMGroupVBlank[k] = 0;
6099 TimePerVMGroupFlip[k] = 0;
6100 TimePerVMRequestVBlank[k] = 0;
6101 TimePerVMRequestFlip[k] = 0;
6102 }
6103 }
6104 }
6105
CalculateStutterEfficiency(int NumberOfActivePlanes,long ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,double SRExitTime,bool SynchronizedVBlank,int DPPPerPlane[],unsigned int DETBufferSizeY[],int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],int SwathHeightY[],int SwathHeightC[],double DCCRateLuma[],double DCCRateChroma[],int HTotal[],int VTotal[],double PixelClock[],double VRatio[],enum scan_direction_class SourceScan[],int BlockHeight256BytesY[],int BlockWidth256BytesY[],int BlockHeight256BytesC[],int BlockWidth256BytesC[],int DCCYMaxUncompressedBlock[],int DCCCMaxUncompressedBlock[],int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthPlaneLuma[],double ReadBandwidthPlaneChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,double * StutterPeriodOut)6106 static void CalculateStutterEfficiency(
6107 int NumberOfActivePlanes,
6108 long ROBBufferSizeInKByte,
6109 double TotalDataReadBandwidth,
6110 double DCFCLK,
6111 double ReturnBW,
6112 double SRExitTime,
6113 bool SynchronizedVBlank,
6114 int DPPPerPlane[],
6115 unsigned int DETBufferSizeY[],
6116 int BytePerPixelY[],
6117 double BytePerPixelDETY[],
6118 double SwathWidthY[],
6119 int SwathHeightY[],
6120 int SwathHeightC[],
6121 double DCCRateLuma[],
6122 double DCCRateChroma[],
6123 int HTotal[],
6124 int VTotal[],
6125 double PixelClock[],
6126 double VRatio[],
6127 enum scan_direction_class SourceScan[],
6128 int BlockHeight256BytesY[],
6129 int BlockWidth256BytesY[],
6130 int BlockHeight256BytesC[],
6131 int BlockWidth256BytesC[],
6132 int DCCYMaxUncompressedBlock[],
6133 int DCCCMaxUncompressedBlock[],
6134 int VActive[],
6135 bool DCCEnable[],
6136 bool WritebackEnable[],
6137 double ReadBandwidthPlaneLuma[],
6138 double ReadBandwidthPlaneChroma[],
6139 double meta_row_bw[],
6140 double dpte_row_bw[],
6141 double *StutterEfficiencyNotIncludingVBlank,
6142 double *StutterEfficiency,
6143 double *StutterPeriodOut)
6144 {
6145 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
6146 double FrameTimeForMinFullDETBufferingTime = 0;
6147 double StutterPeriod = 0;
6148 double AverageReadBandwidth = 0;
6149 double TotalRowReadBandwidth = 0;
6150 double AverageDCCCompressionRate = 0;
6151 double PartOfBurstThatFitsInROB = 0;
6152 double StutterBurstTime = 0;
6153 int TotalActiveWriteback = 0;
6154 double VBlankTime = 0;
6155 double SmallestVBlank = 0;
6156 int BytePerPixelYCriticalPlane = 0;
6157 double SwathWidthYCriticalPlane = 0;
6158 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
6159 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
6160 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6161 double MaximumEffectiveCompressionLuma = 0;
6162 double MaximumEffectiveCompressionChroma = 0;
6163 unsigned int k;
6164
6165 for (k = 0; k < NumberOfActivePlanes; ++k) {
6166 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6167 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6168 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6169 }
6170
6171 StutterPeriod = FullDETBufferingTimeY[0];
6172 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6173 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6174 SwathWidthYCriticalPlane = SwathWidthY[0];
6175 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6176 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6177
6178 for (k = 0; k < NumberOfActivePlanes; ++k) {
6179 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6180 StutterPeriod = FullDETBufferingTimeY[k];
6181 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6182 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6183 SwathWidthYCriticalPlane = SwathWidthY[k];
6184 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6185 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6186 }
6187 }
6188
6189 AverageReadBandwidth = 0;
6190 TotalRowReadBandwidth = 0;
6191 for (k = 0; k < NumberOfActivePlanes; ++k) {
6192 if (DCCEnable[k] == true) {
6193 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6194 || (SourceScan[k] != dm_vert
6195 && BlockHeight256BytesY[k] > SwathHeightY[k])
6196 || DCCYMaxUncompressedBlock[k] < 256) {
6197 MaximumEffectiveCompressionLuma = 2;
6198 } else {
6199 MaximumEffectiveCompressionLuma = 4;
6200 }
6201 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6202
6203 if (ReadBandwidthPlaneChroma[k] > 0) {
6204 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6205 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6206 || DCCCMaxUncompressedBlock[k] < 256) {
6207 MaximumEffectiveCompressionChroma = 2;
6208 } else {
6209 MaximumEffectiveCompressionChroma = 4;
6210 }
6211 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6212 }
6213 } else {
6214 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6215 }
6216 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6217 }
6218
6219 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6220 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6221 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6222 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6223 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6224
6225 TotalActiveWriteback = 0;
6226 for (k = 0; k < NumberOfActivePlanes; ++k) {
6227 if (WritebackEnable[k] == true) {
6228 TotalActiveWriteback = TotalActiveWriteback + 1;
6229 }
6230 }
6231
6232 if (TotalActiveWriteback == 0) {
6233 *StutterEfficiencyNotIncludingVBlank = (1
6234 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6235 } else {
6236 *StutterEfficiencyNotIncludingVBlank = 0;
6237 }
6238
6239 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6240 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6241 } else {
6242 SmallestVBlank = 0;
6243 }
6244 for (k = 0; k < NumberOfActivePlanes; ++k) {
6245 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6246 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6247 } else {
6248 VBlankTime = 0;
6249 }
6250 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6251 }
6252
6253 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6254
6255 if (StutterPeriodOut)
6256 *StutterPeriodOut = StutterPeriod;
6257 }
6258
CalculateSwathAndDETConfiguration(bool ForceSingleDPP,int NumberOfActivePlanes,unsigned int DETBufferSizeInKByte,double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum scan_direction_class SourceScan[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],int ViewportWidth[],int ViewportHeight[],int SurfaceWidthY[],int SurfaceWidthC[],int SurfaceHeightY[],int SurfaceHeightC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],enum odm_combine_mode ODMCombineEnabled[],int BlendingAndTiming[],int BytePerPixY[],int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],int HActive[],double HRatio[],double HRatioChroma[],int DPPPerPlane[],int swath_width_luma_ub[],int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],int SwathHeightY[],int SwathHeightC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool ViewportSizeSupportPerPlane[],bool * ViewportSizeSupport)6259 static void CalculateSwathAndDETConfiguration(
6260 bool ForceSingleDPP,
6261 int NumberOfActivePlanes,
6262 unsigned int DETBufferSizeInKByte,
6263 double MaximumSwathWidthLuma[],
6264 double MaximumSwathWidthChroma[],
6265 enum scan_direction_class SourceScan[],
6266 enum source_format_class SourcePixelFormat[],
6267 enum dm_swizzle_mode SurfaceTiling[],
6268 int ViewportWidth[],
6269 int ViewportHeight[],
6270 int SurfaceWidthY[],
6271 int SurfaceWidthC[],
6272 int SurfaceHeightY[],
6273 int SurfaceHeightC[],
6274 int Read256BytesBlockHeightY[],
6275 int Read256BytesBlockHeightC[],
6276 int Read256BytesBlockWidthY[],
6277 int Read256BytesBlockWidthC[],
6278 enum odm_combine_mode ODMCombineEnabled[],
6279 int BlendingAndTiming[],
6280 int BytePerPixY[],
6281 int BytePerPixC[],
6282 double BytePerPixDETY[],
6283 double BytePerPixDETC[],
6284 int HActive[],
6285 double HRatio[],
6286 double HRatioChroma[],
6287 int DPPPerPlane[],
6288 int swath_width_luma_ub[],
6289 int swath_width_chroma_ub[],
6290 double SwathWidth[],
6291 double SwathWidthChroma[],
6292 int SwathHeightY[],
6293 int SwathHeightC[],
6294 unsigned int DETBufferSizeY[],
6295 unsigned int DETBufferSizeC[],
6296 bool ViewportSizeSupportPerPlane[],
6297 bool *ViewportSizeSupport)
6298 {
6299 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6300 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6301 int MinimumSwathHeightY = 0;
6302 int MinimumSwathHeightC = 0;
6303 long RoundedUpMaxSwathSizeBytesY = 0;
6304 long RoundedUpMaxSwathSizeBytesC = 0;
6305 long RoundedUpMinSwathSizeBytesY = 0;
6306 long RoundedUpMinSwathSizeBytesC = 0;
6307 long RoundedUpSwathSizeBytesY = 0;
6308 long RoundedUpSwathSizeBytesC = 0;
6309 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6310 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6311 int k;
6312
6313 CalculateSwathWidth(
6314 ForceSingleDPP,
6315 NumberOfActivePlanes,
6316 SourcePixelFormat,
6317 SourceScan,
6318 ViewportWidth,
6319 ViewportHeight,
6320 SurfaceWidthY,
6321 SurfaceWidthC,
6322 SurfaceHeightY,
6323 SurfaceHeightC,
6324 ODMCombineEnabled,
6325 BytePerPixY,
6326 BytePerPixC,
6327 Read256BytesBlockHeightY,
6328 Read256BytesBlockHeightC,
6329 Read256BytesBlockWidthY,
6330 Read256BytesBlockWidthC,
6331 BlendingAndTiming,
6332 HActive,
6333 HRatio,
6334 DPPPerPlane,
6335 SwathWidthSingleDPP,
6336 SwathWidthSingleDPPChroma,
6337 SwathWidth,
6338 SwathWidthChroma,
6339 MaximumSwathHeightY,
6340 MaximumSwathHeightC,
6341 swath_width_luma_ub,
6342 swath_width_chroma_ub);
6343
6344 *ViewportSizeSupport = true;
6345 for (k = 0; k < NumberOfActivePlanes; ++k) {
6346 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6347 || SourcePixelFormat[k] == dm_444_16
6348 || SourcePixelFormat[k] == dm_mono_16
6349 || SourcePixelFormat[k] == dm_mono_8
6350 || SourcePixelFormat[k] == dm_rgbe)) {
6351 if (SurfaceTiling[k] == dm_sw_linear
6352 || (SourcePixelFormat[k] == dm_444_64
6353 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6354 && SourceScan[k] != dm_vert)) {
6355 MinimumSwathHeightY = MaximumSwathHeightY[k];
6356 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6357 MinimumSwathHeightY = MaximumSwathHeightY[k];
6358 } else {
6359 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6360 }
6361 MinimumSwathHeightC = MaximumSwathHeightC[k];
6362 } else {
6363 if (SurfaceTiling[k] == dm_sw_linear) {
6364 MinimumSwathHeightY = MaximumSwathHeightY[k];
6365 MinimumSwathHeightC = MaximumSwathHeightC[k];
6366 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6367 && SourceScan[k] == dm_vert) {
6368 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6369 MinimumSwathHeightC = MaximumSwathHeightC[k];
6370 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6371 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6372 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6373 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6374 MinimumSwathHeightY = MaximumSwathHeightY[k];
6375 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6376 } else {
6377 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6378 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6379 }
6380 }
6381
6382 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6383 * MaximumSwathHeightY[k];
6384 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6385 * MinimumSwathHeightY;
6386 if (SourcePixelFormat[k] == dm_420_10) {
6387 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6388 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6389 }
6390 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6391 * MaximumSwathHeightC[k];
6392 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6393 * MinimumSwathHeightC;
6394 if (SourcePixelFormat[k] == dm_420_10) {
6395 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6396 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6397 }
6398
6399 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6400 <= DETBufferSizeInKByte * 1024 / 2) {
6401 SwathHeightY[k] = MaximumSwathHeightY[k];
6402 SwathHeightC[k] = MaximumSwathHeightC[k];
6403 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6404 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6405 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6406 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6407 <= DETBufferSizeInKByte * 1024 / 2) {
6408 SwathHeightY[k] = MinimumSwathHeightY;
6409 SwathHeightC[k] = MaximumSwathHeightC[k];
6410 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6411 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6412 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6413 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6414 <= DETBufferSizeInKByte * 1024 / 2) {
6415 SwathHeightY[k] = MaximumSwathHeightY[k];
6416 SwathHeightC[k] = MinimumSwathHeightC;
6417 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6418 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6419 } else {
6420 SwathHeightY[k] = MinimumSwathHeightY;
6421 SwathHeightC[k] = MinimumSwathHeightC;
6422 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6423 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6424 }
6425
6426 if (SwathHeightC[k] == 0) {
6427 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6428 DETBufferSizeC[k] = 0;
6429 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6430 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6431 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6432 } else {
6433 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6434 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6435 }
6436
6437 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6438 > DETBufferSizeInKByte * 1024 / 2
6439 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6440 || (SwathHeightC[k] > 0
6441 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6442 *ViewportSizeSupport = false;
6443 ViewportSizeSupportPerPlane[k] = false;
6444 } else {
6445 ViewportSizeSupportPerPlane[k] = true;
6446 }
6447 }
6448 }
6449
CalculateSwathWidth(bool ForceSingleDPP,int NumberOfActivePlanes,enum source_format_class SourcePixelFormat[],enum scan_direction_class SourceScan[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMCombineEnabled[],int BytePerPixY[],int BytePerPixC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],int BlendingAndTiming[],unsigned int HActive[],double HRatio[],int DPPPerPlane[],double SwathWidthSingleDPPY[],double SwathWidthSingleDPPC[],double SwathWidthY[],double SwathWidthC[],int MaximumSwathHeightY[],int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])6450 static void CalculateSwathWidth(
6451 bool ForceSingleDPP,
6452 int NumberOfActivePlanes,
6453 enum source_format_class SourcePixelFormat[],
6454 enum scan_direction_class SourceScan[],
6455 unsigned int ViewportWidth[],
6456 unsigned int ViewportHeight[],
6457 unsigned int SurfaceWidthY[],
6458 unsigned int SurfaceWidthC[],
6459 unsigned int SurfaceHeightY[],
6460 unsigned int SurfaceHeightC[],
6461 enum odm_combine_mode ODMCombineEnabled[],
6462 int BytePerPixY[],
6463 int BytePerPixC[],
6464 int Read256BytesBlockHeightY[],
6465 int Read256BytesBlockHeightC[],
6466 int Read256BytesBlockWidthY[],
6467 int Read256BytesBlockWidthC[],
6468 int BlendingAndTiming[],
6469 unsigned int HActive[],
6470 double HRatio[],
6471 int DPPPerPlane[],
6472 double SwathWidthSingleDPPY[],
6473 double SwathWidthSingleDPPC[],
6474 double SwathWidthY[],
6475 double SwathWidthC[],
6476 int MaximumSwathHeightY[],
6477 int MaximumSwathHeightC[],
6478 unsigned int swath_width_luma_ub[],
6479 unsigned int swath_width_chroma_ub[])
6480 {
6481 unsigned int k, j;
6482 long surface_width_ub_l;
6483 long surface_height_ub_l;
6484 long surface_width_ub_c;
6485 long surface_height_ub_c;
6486
6487 for (k = 0; k < NumberOfActivePlanes; ++k) {
6488 enum odm_combine_mode MainPlaneODMCombine = 0;
6489 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6490 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6491 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6492 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6493
6494 if (SourceScan[k] != dm_vert) {
6495 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6496 } else {
6497 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6498 }
6499
6500 MainPlaneODMCombine = ODMCombineEnabled[k];
6501 for (j = 0; j < NumberOfActivePlanes; ++j) {
6502 if (BlendingAndTiming[k] == j) {
6503 MainPlaneODMCombine = ODMCombineEnabled[j];
6504 }
6505 }
6506
6507 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6508 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6509 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6510 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6511 } else if (DPPPerPlane[k] == 2) {
6512 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6513 } else {
6514 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6515 }
6516
6517 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6518 SwathWidthC[k] = SwathWidthY[k] / 2;
6519 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6520 } else {
6521 SwathWidthC[k] = SwathWidthY[k];
6522 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6523 }
6524
6525 if (ForceSingleDPP == true) {
6526 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6527 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6528 }
6529
6530 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6531 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6532 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6533 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6534
6535 if (SourceScan[k] != dm_vert) {
6536 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6537 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6538 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6539 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6540 if (BytePerPixC[k] > 0) {
6541 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6542 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6543 } else {
6544 swath_width_chroma_ub[k] = 0;
6545 }
6546 } else {
6547 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6548 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6549 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6550 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6551 if (BytePerPixC[k] > 0) {
6552 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6553 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6554 } else {
6555 swath_width_chroma_ub[k] = 0;
6556 }
6557 }
6558 }
6559 }
6560
CalculateExtraLatency(long RoundTripPingLatencyCycles,long ReorderingBytes,double DCFCLK,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6561 static double CalculateExtraLatency(
6562 long RoundTripPingLatencyCycles,
6563 long ReorderingBytes,
6564 double DCFCLK,
6565 int TotalNumberOfActiveDPP,
6566 int PixelChunkSizeInKByte,
6567 int TotalNumberOfDCCActiveDPP,
6568 int MetaChunkSize,
6569 double ReturnBW,
6570 bool GPUVMEnable,
6571 bool HostVMEnable,
6572 int NumberOfActivePlanes,
6573 int NumberOfDPP[],
6574 int dpte_group_bytes[],
6575 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6576 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6577 double HostVMMinPageSize,
6578 int HostVMMaxNonCachedPageTableLevels)
6579 {
6580 double ExtraLatencyBytes = 0;
6581 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6582 ReorderingBytes,
6583 TotalNumberOfActiveDPP,
6584 PixelChunkSizeInKByte,
6585 TotalNumberOfDCCActiveDPP,
6586 MetaChunkSize,
6587 GPUVMEnable,
6588 HostVMEnable,
6589 NumberOfActivePlanes,
6590 NumberOfDPP,
6591 dpte_group_bytes,
6592 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6593 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6594 HostVMMinPageSize,
6595 HostVMMaxNonCachedPageTableLevels);
6596
6597 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6598 }
6599
CalculateExtraLatencyBytes(long ReorderingBytes,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6600 static double CalculateExtraLatencyBytes(
6601 long ReorderingBytes,
6602 int TotalNumberOfActiveDPP,
6603 int PixelChunkSizeInKByte,
6604 int TotalNumberOfDCCActiveDPP,
6605 int MetaChunkSize,
6606 bool GPUVMEnable,
6607 bool HostVMEnable,
6608 int NumberOfActivePlanes,
6609 int NumberOfDPP[],
6610 int dpte_group_bytes[],
6611 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6612 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6613 double HostVMMinPageSize,
6614 int HostVMMaxNonCachedPageTableLevels)
6615 {
6616 double ret = 0;
6617 double HostVMInefficiencyFactor = 0;
6618 int HostVMDynamicLevels = 0;
6619 unsigned int k;
6620
6621 if (GPUVMEnable == true && HostVMEnable == true) {
6622 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6623 if (HostVMMinPageSize < 2048) {
6624 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6625 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6626 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6627 } else {
6628 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6629 }
6630 } else {
6631 HostVMInefficiencyFactor = 1;
6632 HostVMDynamicLevels = 0;
6633 }
6634
6635 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6636
6637 if (GPUVMEnable == true) {
6638 for (k = 0; k < NumberOfActivePlanes; ++k) {
6639 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6640 }
6641 }
6642 return ret;
6643 }
6644
6645
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)6646 static double CalculateUrgentLatency(
6647 double UrgentLatencyPixelDataOnly,
6648 double UrgentLatencyPixelMixedWithVMData,
6649 double UrgentLatencyVMDataOnly,
6650 bool DoUrgentLatencyAdjustment,
6651 double UrgentLatencyAdjustmentFabricClockComponent,
6652 double UrgentLatencyAdjustmentFabricClockReference,
6653 double FabricClock)
6654 {
6655 double ret;
6656
6657 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6658 if (DoUrgentLatencyAdjustment == true) {
6659 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6660 }
6661 return ret;
6662 }
6663
UseMinimumDCFCLK(struct display_mode_lib * mode_lib,int MaxInterDCNTileRepeaters,int MaxPrefetchMode,double FinalDRAMClockChangeLatency,double SREnterPlusExitTime,int ReturnBusWidth,int RoundTripPingLatencyCycles,int ReorderingBytes,int PixelChunkSizeInKByte,int MetaChunkSize,bool GPUVMEnable,int GPUVMMaxPageTableLevels,bool HostVMEnable,int NumberOfActivePlanes,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,enum immediate_flip_requirement ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,int VTotal[],int VActive[],int DynamicMetadataTransmittedBytes[],int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFCLKDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],double TotalVActivePixelBandwidth[][2],double TotalVActiveCursorBandwidth[][2],double TotalMetaRowBandwidth[][2],double TotalDPTERowBandwidth[][2],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],int BytePerPixelY[],int BytePerPixelC[],int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])6664 static noinline_for_stack void UseMinimumDCFCLK(
6665 struct display_mode_lib *mode_lib,
6666 int MaxInterDCNTileRepeaters,
6667 int MaxPrefetchMode,
6668 double FinalDRAMClockChangeLatency,
6669 double SREnterPlusExitTime,
6670 int ReturnBusWidth,
6671 int RoundTripPingLatencyCycles,
6672 int ReorderingBytes,
6673 int PixelChunkSizeInKByte,
6674 int MetaChunkSize,
6675 bool GPUVMEnable,
6676 int GPUVMMaxPageTableLevels,
6677 bool HostVMEnable,
6678 int NumberOfActivePlanes,
6679 double HostVMMinPageSize,
6680 int HostVMMaxNonCachedPageTableLevels,
6681 bool DynamicMetadataVMEnabled,
6682 enum immediate_flip_requirement ImmediateFlipRequirement,
6683 bool ProgressiveToInterlaceUnitInOPP,
6684 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
6685 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6686 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6687 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
6688 int VTotal[],
6689 int VActive[],
6690 int DynamicMetadataTransmittedBytes[],
6691 int DynamicMetadataLinesBeforeActiveRequired[],
6692 bool Interlace[],
6693 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
6694 double RequiredDISPCLK[][2],
6695 double UrgLatency[],
6696 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
6697 double ProjectedDCFCLKDeepSleep[][2],
6698 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
6699 double TotalVActivePixelBandwidth[][2],
6700 double TotalVActiveCursorBandwidth[][2],
6701 double TotalMetaRowBandwidth[][2],
6702 double TotalDPTERowBandwidth[][2],
6703 unsigned int TotalNumberOfActiveDPP[][2],
6704 unsigned int TotalNumberOfDCCActiveDPP[][2],
6705 int dpte_group_bytes[],
6706 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
6707 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
6708 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
6709 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
6710 int BytePerPixelY[],
6711 int BytePerPixelC[],
6712 int HTotal[],
6713 double PixelClock[],
6714 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
6715 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
6716 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
6717 bool DynamicMetadataEnable[],
6718 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
6719 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
6720 double ReadBandwidthLuma[],
6721 double ReadBandwidthChroma[],
6722 double DCFCLKPerState[],
6723 double DCFCLKState[][2])
6724 {
6725 double NormalEfficiency = 0;
6726 double PTEEfficiency = 0;
6727 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6728 unsigned int i, j, k;
6729
6730 NormalEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6731 : PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6732 PTEEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6733 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6734 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6735 for (j = 0; j <= 1; ++j) {
6736 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6737 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6738 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6739 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6740 double MinimumTWait = 0;
6741 double NonDPTEBandwidth = 0;
6742 double DPTEBandwidth = 0;
6743 double DCFCLKRequiredForAverageBandwidth = 0;
6744 double ExtraLatencyBytes = 0;
6745 double ExtraLatencyCycles = 0;
6746 double DCFCLKRequiredForPeakBandwidth = 0;
6747 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6748 double MinimumTvmPlus2Tr0 = 0;
6749
6750 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6751 for (k = 0; k < NumberOfActivePlanes; ++k) {
6752 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6753 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
6754 }
6755
6756 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6757 NoOfDPPState[k] = NoOfDPP[i][j][k];
6758 }
6759
6760 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
6761 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
6762 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
6763 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
6764 DCFCLKRequiredForAverageBandwidth = dml_max3(ProjectedDCFCLKDeepSleep[i][j],
6765 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth / (MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6766 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / ReturnBusWidth);
6767
6768 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP[i][j],
6769 MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NoOfDPPState, dpte_group_bytes,
6770 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6771 HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
6772 ExtraLatencyCycles = RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
6773 for (k = 0; k < NumberOfActivePlanes; ++k) {
6774 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6775 double ExpectedPrefetchBWAcceleration = { 0 };
6776 double PrefetchTime = { 0 };
6777
6778 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
6779 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
6780 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6781 / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * DPTEBytesPerRow[i][j][k] / PTEEfficiency
6782 / NormalEfficiency / ReturnBusWidth + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6783 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
6784 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
6785 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
6786 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6787 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - UrgLatency[i] * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels
6788 : GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6789
6790 if (PrefetchTime > 0) {
6791 double ExpectedVRatioPrefetch = { 0 };
6792 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6793 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6794 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6795 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
6796 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6797 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / ReturnBusWidth;
6798 }
6799 } else {
6800 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6801 }
6802 if (DynamicMetadataEnable[k] == true) {
6803 double TsetupPipe = { 0 };
6804 double TdmbfPipe = { 0 };
6805 double TdmsksPipe = { 0 };
6806 double TdmecPipe = { 0 };
6807 double AllowedTimeForUrgentExtraLatency = { 0 };
6808
6809 CalculateDynamicMetadataParameters(
6810 MaxInterDCNTileRepeaters,
6811 RequiredDPPCLK[i][j][k],
6812 RequiredDISPCLK[i][j],
6813 ProjectedDCFCLKDeepSleep[i][j],
6814 PixelClock[k],
6815 HTotal[k],
6816 VTotal[k] - VActive[k],
6817 DynamicMetadataTransmittedBytes[k],
6818 DynamicMetadataLinesBeforeActiveRequired[k],
6819 Interlace[k],
6820 ProgressiveToInterlaceUnitInOPP,
6821 &TsetupPipe,
6822 &TdmbfPipe,
6823 &TdmecPipe,
6824 &TdmsksPipe);
6825 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TsetupPipe
6826 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6827 if (AllowedTimeForUrgentExtraLatency > 0) {
6828 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6829 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6830 } else {
6831 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6832 }
6833 }
6834 }
6835 DCFCLKRequiredForPeakBandwidth = 0;
6836 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6837 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6838 }
6839 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? (HostVMEnable == true ?
6840 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
6841 for (k = 0; k < NumberOfActivePlanes; ++k) {
6842 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6843 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6844 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6845 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
6846 } else {
6847 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6848 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6849 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6850 }
6851 }
6852 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6853 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6854 }
6855 }
6856 }
6857
6858 #endif /* CONFIG_DRM_AMD_DC_DCN */
6859