diff options
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c')
-rw-r--r-- | drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 930 |
1 files changed, 426 insertions, 504 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 5a701d9df0f7..4b010b1b8aed 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -390,61 +390,8 @@ void dml32_CalculateBytePerPixelAndBlockSizes( #endif } // CalculateBytePerPixelAndBlockSizes -void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput( - double HRatio, - double HRatioChroma, - double VRatio, - double VRatioChroma, - double MaxDCHUBToPSCLThroughput, - double MaxPSCLToLBThroughput, - double PixelClock, - enum source_format_class SourcePixelFormat, - unsigned int HTaps, - unsigned int HTapsChroma, - unsigned int VTaps, - unsigned int VTapsChroma, - - /* output */ - double *PSCL_THROUGHPUT, - double *PSCL_THROUGHPUT_CHROMA, - double *DPPCLKUsingdoubleDPP) -{ - double DPPCLKUsingdoubleDPPLuma; - double DPPCLKUsingdoubleDPPChroma; - - if (HRatio > 1) { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / - dml_ceil((double) HTaps / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - - DPPCLKUsingdoubleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / - *PSCL_THROUGHPUT, 1); - - if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingdoubleDPPLuma < 2 * PixelClock) - DPPCLKUsingdoubleDPPLuma = 2 * PixelClock; - - if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && - SourcePixelFormat != dm_rgbe_alpha)) { - *PSCL_THROUGHPUT_CHROMA = 0; - *DPPCLKUsingdoubleDPP = DPPCLKUsingdoubleDPPLuma; - } else { - if (HRatioChroma > 1) { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * - HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - DPPCLKUsingdoubleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), - HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); - if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingdoubleDPPChroma < 2 * PixelClock) - DPPCLKUsingdoubleDPPChroma = 2 * PixelClock; - *DPPCLKUsingdoubleDPP = dml_max(DPPCLKUsingdoubleDPPLuma, DPPCLKUsingdoubleDPPChroma); - } -} - void dml32_CalculateSwathAndDETConfiguration( + struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, @@ -454,6 +401,9 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int NumberOfActiveSurfaces, unsigned int nomDETInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, unsigned int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class Output[], double ReadBandwidthLuma[], @@ -501,24 +451,20 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int DETBufferSizeC[], bool *UnboundedRequestEnabled, unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport) { - unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; - unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpSwathSizeBytesY; - unsigned int RoundedUpSwathSizeBytesC; - double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; - double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; unsigned int k; - unsigned int TotalActiveDPP = 0; - bool NoChromaSurfaces = true; - unsigned int DETBufferSizeInKByteForSwathCalculation; + + st_vars->TotalActiveDPP = 0; + st_vars->NoChromaSurfaces = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); + dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); + dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); #endif dml32_CalculateSwathWidth(ForceSingleDPP, NumberOfActiveSurfaces, @@ -548,48 +494,64 @@ void dml32_CalculateSwathAndDETConfiguration( DPPPerSurface, /* Output */ - SwathWidthdoubleDPP, - SwathWidthdoubleDPPChroma, + st_vars->SwathWidthdoubleDPP, + st_vars->SwathWidthdoubleDPPChroma, SwathWidth, SwathWidthChroma, - MaximumSwathHeightY, - MaximumSwathHeightC, + st_vars->MaximumSwathHeightY, + st_vars->MaximumSwathHeightC, swath_width_luma_ub, swath_width_chroma_ub); for (k = 0; k < NumberOfActiveSurfaces; ++k) { - RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; - RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; + st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k]; + st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, - RoundedUpMaxSwathSizeBytesY[k]); + st_vars->RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, - RoundedUpMaxSwathSizeBytesC[k]); + st_vars->RoundedUpMaxSwathSizeBytesC[k]); #endif if (SourcePixelFormat[k] == dm_420_10) { - RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); - RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); + st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256); + st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256); } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); + st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChromaSurfaces = false; + st_vars->NoChromaSurfaces = false; } } - *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, - NoChromaSurfaces, Output[0]); + // By default, just set the reserved space to 2 pixel chunks size + *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; + + // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data + // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] + // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512); + + if (*CompBufReservedSpaceNeedAdjustment == 1) { + *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512; + } + + #ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); + #endif + + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); dml32_CalculateDETBufferSize(DETSizeOverride, UseMALLForPStateChange, @@ -604,8 +566,8 @@ void dml32_CalculateSwathAndDETConfiguration( SourcePixelFormat, ReadBandwidthLuma, ReadBandwidthChroma, - RoundedUpMaxSwathSizeBytesY, - RoundedUpMaxSwathSizeBytesC, + st_vars->RoundedUpMaxSwathSizeBytesY, + st_vars->RoundedUpMaxSwathSizeBytesC, DPPPerSurface, /* Output */ @@ -613,7 +575,7 @@ void dml32_CalculateSwathAndDETConfiguration( CompressedBufferSizeInkByte); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); + dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP); dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); @@ -624,42 +586,42 @@ void dml32_CalculateSwathAndDETConfiguration( *ViewportSizeSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == + st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, - DETBufferSizeInKByteForSwathCalculation); + st_vars->DETBufferSizeInKByteForSwathCalculation); #endif - if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k]; - SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && - RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && - RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k]; - SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; + } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && + st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; + } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && + st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; } else { - SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; } - if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) + if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 > + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { *ViewportSizeSupport = false; @@ -674,7 +636,7 @@ void dml32_CalculateSwathAndDETConfiguration( #endif DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; DETBufferSizeC[k] = 0; - } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { + } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); #endif @@ -692,11 +654,11 @@ void dml32_CalculateSwathAndDETConfiguration( dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, - k, RoundedUpMaxSwathSizeBytesY[k]); + k, st_vars->RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, - k, RoundedUpMaxSwathSizeBytesC[k]); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); + k, st_vars->RoundedUpMaxSwathSizeBytesC[k]); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC); dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); @@ -907,9 +869,12 @@ void dml32_CalculateSwathWidth( } // CalculateSwathWidth bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, - unsigned int TotalNumberOfActiveDPP, - bool NoChroma, - enum output_encoder_class Output) + unsigned int TotalNumberOfActiveDPP, + bool NoChroma, + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) { bool ret_val = false; @@ -917,7 +882,20 @@ bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequest TotalNumberOfActiveDPP == 1 && NoChroma); if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) ret_val = false; - return ret_val; + + if (SurfaceTiling == dm_sw_linear) + ret_val = false; + + if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) + ret_val = false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); +#endif + + return (ret_val); } void dml32_CalculateDETBufferSize( @@ -1686,17 +1664,22 @@ double dml32_RequiredDTBCLK( unsigned int AudioRate, unsigned int AudioLayout) { - double PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); - double HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * - dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); - double HCBlank = 64 + 32 * - dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); - double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; - double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + double PixelWordRate; + double HCActive; + double HCBlank; + double AverageTribyteRate; + double HActiveTribyteRate; if (DSCEnable != true) return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); + HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * + dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + HCBlank = 64 + 32 * + dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + HActiveTribyteRate = PixelWordRate * HCActive / HActive; return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; } @@ -1884,6 +1867,7 @@ void dml32_CalculateSurfaceSizeInMall( } // CalculateSurfaceSizeInMall void dml32_CalculateVMRowAndSwath( + struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], @@ -1949,21 +1933,6 @@ void dml32_CalculateVMRowAndSwath( unsigned int BIGK_FRAGMENT_SIZE[]) { unsigned int k; - unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; - unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; - unsigned int PDEAndMetaPTEBytesFrameY; - unsigned int PDEAndMetaPTEBytesFrameC; - unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; - unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; - bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (HostVMEnable == true) { @@ -1985,15 +1954,15 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && !IsVertical(myPipe[k].SourceRotation)) { - PTEBufferSizeInRequestsForLuma[k] = + st_vars->PTEBufferSizeInRequestsForLuma[k] = (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; - PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; + st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k]; } else { - PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; - PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; + st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; + st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; } - PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( + st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -2013,21 +1982,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - PTEBufferSizeInRequestsForChroma[k], + st_vars->PTEBufferSizeInRequestsForChroma[k], myPipe[k].PitchC, myPipe[k].DCCMetaPitchC, myPipe[k].BlockWidthC, myPipe[k].BlockHeightC, /* Output */ - &MetaRowByteC[k], - &PixelPTEBytesPerRowC[k], + &st_vars->MetaRowByteC[k], + &st_vars->PixelPTEBytesPerRowC[k], &dpte_row_width_chroma_ub[k], &dpte_row_height_chroma[k], &dpte_row_height_linear_chroma[k], - &PixelPTEBytesPerRowC_one_row_per_frame[k], - &dpte_row_width_chroma_ub_one_row_per_frame[k], - &dpte_row_height_chroma_one_row_per_frame[k], + &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k], + &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k], + &st_vars->dpte_row_height_chroma_one_row_per_frame[k], &meta_req_width_chroma[k], &meta_req_height_chroma[k], &meta_row_width_chroma[k], @@ -2055,19 +2024,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillC[k], &MaxNumSwathC[k]); } else { - PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; - PTEBufferSizeInRequestsForChroma[k] = 0; - PixelPTEBytesPerRowC[k] = 0; - PDEAndMetaPTEBytesFrameC = 0; - MetaRowByteC[k] = 0; + st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; + st_vars->PTEBufferSizeInRequestsForChroma[k] = 0; + st_vars->PixelPTEBytesPerRowC[k] = 0; + st_vars->PDEAndMetaPTEBytesFrameC = 0; + st_vars->MetaRowByteC[k] = 0; MaxNumSwathC[k] = 0; PrefetchSourceLinesC[k] = 0; - dpte_row_height_chroma_one_row_per_frame[k] = 0; - dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; - PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0; + st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; } - PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( + st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -2087,21 +2056,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - PTEBufferSizeInRequestsForLuma[k], + st_vars->PTEBufferSizeInRequestsForLuma[k], myPipe[k].PitchY, myPipe[k].DCCMetaPitchY, myPipe[k].BlockWidthY, myPipe[k].BlockHeightY, /* Output */ - &MetaRowByteY[k], - &PixelPTEBytesPerRowY[k], + &st_vars->MetaRowByteY[k], + &st_vars->PixelPTEBytesPerRowY[k], &dpte_row_width_luma_ub[k], &dpte_row_height_luma[k], &dpte_row_height_linear_luma[k], - &PixelPTEBytesPerRowY_one_row_per_frame[k], - &dpte_row_width_luma_ub_one_row_per_frame[k], - &dpte_row_height_luma_one_row_per_frame[k], + &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k], + &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k], + &st_vars->dpte_row_height_luma_one_row_per_frame[k], &meta_req_width[k], &meta_req_height[k], &meta_row_width[k], @@ -2129,19 +2098,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillY[k], &MaxNumSwathY[k]); - PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; - MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; + PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC; + MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k]; - if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && - PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { + if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] && + st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) { PTEBufferSizeNotExceeded[k] = true; } else { PTEBufferSizeNotExceeded[k] = false; } - one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * - PTEBufferSizeInRequestsForLuma[k] && - PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); + st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * + st_vars->PTEBufferSizeInRequestsForLuma[k] && + st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]); } dml32_CalculateMALLUseForStaticScreen( @@ -2149,7 +2118,7 @@ void dml32_CalculateVMRowAndSwath( MALLAllocatedForDCN, UseMALLForStaticScreen, // mode SurfaceSizeInMALL, - one_row_per_frame_fits_in_buffer, + st_vars->one_row_per_frame_fits_in_buffer, /* Output */ UsesMALLForStaticScreen); // boolen @@ -2175,13 +2144,13 @@ void dml32_CalculateVMRowAndSwath( !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); if (use_one_row_for_frame[k]) { - dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; - dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; - PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; - dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; - dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; - PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; - PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; + dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k]; + dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k]; + st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k]; + dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k]; + dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k]; + st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k]; + PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k]; } if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) @@ -2189,7 +2158,7 @@ void dml32_CalculateVMRowAndSwath( else DCCMetaBufferSizeNotExceeded[k] = false; - PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; + PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k]; if (use_one_row_for_frame[k]) PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; @@ -2200,11 +2169,11 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].VRatioChroma, myPipe[k].DCCEnable, myPipe[k].HTotal / myPipe[k].PixelClock, - MetaRowByteY[k], MetaRowByteC[k], + st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k], meta_row_height[k], meta_row_height_chroma[k], - PixelPTEBytesPerRowY[k], - PixelPTEBytesPerRowC[k], + st_vars->PixelPTEBytesPerRowY[k], + st_vars->PixelPTEBytesPerRowC[k], dpte_row_height_luma[k], dpte_row_height_chroma[k], @@ -2220,12 +2189,12 @@ void dml32_CalculateVMRowAndSwath( dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", __func__, k, dpte_row_width_luma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]); dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", __func__, k, dpte_row_height_chroma[k]); dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", __func__, k, dpte_row_width_chroma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]); dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", __func__, k, PTEBufferSizeNotExceeded[k]); @@ -3373,6 +3342,7 @@ double dml32_CalculateExtraLatency( } // CalculateExtraLatency bool dml32_CalculatePrefetchSchedule( + struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, @@ -3436,45 +3406,18 @@ bool dml32_CalculatePrefetchSchedule( double *VReadyOffsetPix) { bool MyError = false; - unsigned int DPPCycles, DISPCLKCycles; - double DSTTotalPixelsAfterScaler; - double LineTime; - double dst_y_prefetch_equ; - double prefetch_bw_oto; - double Tvm_oto; - double Tr0_oto; - double Tvm_oto_lines; - double Tr0_oto_lines; - double dst_y_prefetch_oto; - double TimeForFetchingMetaPTE = 0; - double TimeForFetchingRowInVBlank = 0; - double LinesToRequestPrefetchPixelData = 0; - unsigned int HostVMDynamicLevelsTrips; - double trip_to_mem; - double Tvm_trips; - double Tr0_trips; - double Tvm_trips_rounded; - double Tr0_trips_rounded; - double Lsw_oto; - double Tpre_rounded; - double prefetch_bw_equ; - double Tvm_equ; - double Tr0_equ; - double Tdmbf; - double Tdmec; - double Tdmsks; - double prefetch_sw_bytes; - double bytes_pp; - double dep_bytes; - unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; - double min_Lsw; - double Tsw_est1 = 0; - double Tsw_est3 = 0; + + st_vars->TimeForFetchingMetaPTE = 0; + st_vars->TimeForFetchingRowInVBlank = 0; + st_vars->LinesToRequestPrefetchPixelData = 0; + st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__; + st_vars->Tsw_est1 = 0; + st_vars->Tsw_est3 = 0; if (GPUVMEnable == true && HostVMEnable == true) - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; else - HostVMDynamicLevelsTrips = 0; + st_vars->HostVMDynamicLevelsTrips = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); @@ -3497,19 +3440,19 @@ bool dml32_CalculatePrefetchSchedule( TSetup, /* output */ - &Tdmbf, - &Tdmec, - &Tdmsks, + &st_vars->Tdmbf, + &st_vars->Tdmec, + &st_vars->Tdmsks, VUpdateOffsetPix, VUpdateWidthPix, VReadyOffsetPix); - LineTime = myPipe->HTotal / myPipe->PixelClock; - trip_to_mem = UrgentLatency; - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock; + st_vars->trip_to_mem = UrgentLatency; + st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); if (DynamicMetadataVMEnabled == true) - *Tdmdl = TWait + Tvm_trips + trip_to_mem; + *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem; else *Tdmdl = TWait + UrgentExtraLatency; @@ -3519,15 +3462,15 @@ bool dml32_CalculatePrefetchSchedule( #endif if (DynamicMetadataEnable == true) { - if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { + if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", - __func__, Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + __func__, st_vars->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", - __func__, Tdmsks); + __func__, st_vars->Tdmsks); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); #endif @@ -3539,21 +3482,21 @@ bool dml32_CalculatePrefetchSchedule( } *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && - GPUVMEnable == true ? TWait + Tvm_trips : 0); + GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0); if (myPipe->ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; else - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; - DISPCLKCycles = DISPCLKDelaySubtotal; + st_vars->DISPCLKCycles = DISPCLKDelaySubtotal; if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) return true; - *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * + *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles * myPipe->PixelClock / myPipe->Dispclk + DSCDelay; *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) @@ -3563,10 +3506,10 @@ bool dml32_CalculatePrefetchSchedule( + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); + dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles); dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); - dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles); dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); @@ -3579,9 +3522,9 @@ bool dml32_CalculatePrefetchSchedule( else *DSTYAfterScaler = 0; - DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); @@ -3589,132 +3532,132 @@ bool dml32_CalculatePrefetchSchedule( MyError = false; - Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); + st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1); if (GPUVMEnable == true) { - Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; if (GPUVMPageTableLevels >= 3) { - *Tno_bw = UrgentExtraLatency + trip_to_mem * - (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); + *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem * + (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { - Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / - 4.0 * LineTime; // VBA_ERROR + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) / + 4.0 * st_vars->LineTime; // VBA_ERROR *Tno_bw = UrgentExtraLatency; } else { *Tno_bw = 0; } } else if (myPipe->DCCEnable == true) { - Tvm_trips_rounded = LineTime / 4.0; - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; *Tno_bw = 0; } else { - Tvm_trips_rounded = LineTime / 4.0; - Tr0_trips_rounded = LineTime / 2.0; + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; + st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0; *Tno_bw = 0; } - Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); - Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); + st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0); + st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0); if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) { - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; } else { - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; } - prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; - prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, - prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); + st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, + st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime)); - min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; - min_Lsw = dml_max(min_Lsw, 1.0); - Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; + st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre; + st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0); + st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0; if (GPUVMEnable == true) { - Tvm_oto = dml_max3( - Tvm_trips, - *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, - LineTime / 4.0); + st_vars->Tvm_oto = dml_max3( + st_vars->Tvm_trips, + *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto, + st_vars->LineTime / 4.0); } else - Tvm_oto = LineTime / 4.0; + st_vars->Tvm_oto = st_vars->LineTime / 4.0; if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - Tr0_oto = dml_max4( - Tr0_trips, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, - (LineTime - Tvm_oto)/2.0, - LineTime / 4.0); + st_vars->Tr0_oto = dml_max4( + st_vars->Tr0_trips, + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto, + (st_vars->LineTime - st_vars->Tvm_oto)/2.0, + st_vars->LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); - dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); - dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips); + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto); + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4); #endif } else - Tr0_oto = (LineTime - Tvm_oto) / 2.0; + st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0; - Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; - Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; - dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; + st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0; + st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0; + st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto; - dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - + st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime - (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); - dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); + dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw); dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); - dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); + dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem); dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); - dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); - dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes); + dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp); dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); - dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); - dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); - dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); - dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); - dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); - dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); - dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); - dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); + dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto); + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines); + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines); + dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto); + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto); + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ); #endif - dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; - Tpre_rounded = dst_y_prefetch_equ * LineTime; + st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0; + st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); - dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ); + dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", - __func__, VStartup * LineTime); + __func__, VStartup * st_vars->LineTime); dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", __func__, *DSTYAfterScaler); #endif - dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, + st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); - if (prefetch_sw_bytes < dep_bytes) - prefetch_sw_bytes = 2 * dep_bytes; + if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes) + st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes; *PrefetchBandwidth = 0; *DestinationLinesToRequestVMInVBlank = 0; @@ -3722,61 +3665,61 @@ bool dml32_CalculatePrefetchSchedule( *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; - if (dst_y_prefetch_equ > 1) { + if (st_vars->dst_y_prefetch_equ > 1) { double PrefetchBandwidth1; double PrefetchBandwidth2; double PrefetchBandwidth3; double PrefetchBandwidth4; - if (Tpre_rounded - *Tno_bw > 0) { + if (st_vars->Tpre_rounded - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); - Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw); + st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1; } else PrefetchBandwidth1 = 0; - if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) - && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { + if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw) + && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw); } - if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) - PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / - (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); + if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0) + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) / + (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded); else PrefetchBandwidth2 = 0; - if (Tpre_rounded - Tvm_trips_rounded > 0) { + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); - Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded); + st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3; } else PrefetchBandwidth3 = 0; if (VStartup == MaxVStartup && - (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * - LineTime - Tvm_trips_rounded > 0) { + (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * + st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded); } - if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { - PrefetchBandwidth4 = prefetch_sw_bytes / - (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) { + PrefetchBandwidth4 = st_vars->prefetch_sw_bytes / + (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded); } else { PrefetchBandwidth4 = 0; } #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded); dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); - dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); - dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); - dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded); + dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1); + dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3); dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); @@ -3789,9 +3732,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth1 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 - >= Tvm_trips_rounded + >= st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth1 >= Tr0_trips_rounded) { + / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) { Case1OK = true; } else { Case1OK = false; @@ -3802,9 +3745,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth2 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 - >= Tvm_trips_rounded + >= st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth2 < Tr0_trips_rounded) { + / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) { Case2OK = true; } else { Case2OK = false; @@ -3815,9 +3758,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth3 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < - Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * + st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= - Tr0_trips_rounded) { + st_vars->Tr0_trips_rounded) { Case3OK = true; } else { Case3OK = false; @@ -3827,80 +3770,80 @@ bool dml32_CalculatePrefetchSchedule( } if (Case1OK) - prefetch_bw_equ = PrefetchBandwidth1; + st_vars->prefetch_bw_equ = PrefetchBandwidth1; else if (Case2OK) - prefetch_bw_equ = PrefetchBandwidth2; + st_vars->prefetch_bw_equ = PrefetchBandwidth2; else if (Case3OK) - prefetch_bw_equ = PrefetchBandwidth3; + st_vars->prefetch_bw_equ = PrefetchBandwidth3; else - prefetch_bw_equ = PrefetchBandwidth4; + st_vars->prefetch_bw_equ = PrefetchBandwidth4; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); - dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ); #endif - if (prefetch_bw_equ > 0) { + if (st_vars->prefetch_bw_equ > 0) { if (GPUVMEnable == true) { - Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * - HostVMInefficiencyFactor / prefetch_bw_equ, - Tvm_trips, LineTime / 4); + st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * + HostVMInefficiencyFactor / st_vars->prefetch_bw_equ, + st_vars->Tvm_trips, st_vars->LineTime / 4); } else { - Tvm_equ = LineTime / 4; + st_vars->Tvm_equ = st_vars->LineTime / 4; } if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * - HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, - (LineTime - Tvm_equ) / 2, LineTime / 4); + st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * + HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips, + (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4); } else { - Tr0_equ = (LineTime - Tvm_equ) / 2; + st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2; } } else { - Tvm_equ = 0; - Tr0_equ = 0; + st_vars->Tvm_equ = 0; + st_vars->Tr0_equ = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); #endif } } - if (dst_y_prefetch_oto < dst_y_prefetch_equ) { - *DestinationLinesForPrefetch = dst_y_prefetch_oto; - TimeForFetchingMetaPTE = Tvm_oto; - TimeForFetchingRowInVBlank = Tr0_oto; - *PrefetchBandwidth = prefetch_bw_oto; + if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) { + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto; + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto; + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto; + *PrefetchBandwidth = st_vars->prefetch_bw_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ; + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ; + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ; + *PrefetchBandwidth = st_vars->prefetch_bw_equ; } - *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0; *DestinationLinesToRequestRowInVBlank = - dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; + dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0; - LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - + st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); - dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData); #endif - if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { - *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; + if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) { + *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData; *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); @@ -3908,12 +3851,12 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); #endif if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / - LinesToRequestPrefetchPixelData, + st_vars->LinesToRequestPrefetchPixelData, (double) MaxNumSwathY * SwathHeightY / - (LinesToRequestPrefetchPixelData - + (st_vars->LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); } else { @@ -3927,7 +3870,7 @@ bool dml32_CalculatePrefetchSchedule( #endif } - *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData; *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); #ifdef __DML_VBA_DEBUG__ @@ -3936,11 +3879,11 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); #endif if ((SwathHeightC > 4)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { *VRatioPrefetchC = dml_max(*VRatioPrefetchC, (double) MaxNumSwathC * SwathHeightC / - (LinesToRequestPrefetchPixelData - + (st_vars->LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); } else { @@ -3955,25 +3898,25 @@ bool dml32_CalculatePrefetchSchedule( } *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY - / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub - / LineTime; + / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub + / st_vars->LineTime; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); #endif *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / - LinesToRequestPrefetchPixelData + st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC - * swath_width_chroma_ub / LineTime; + * swath_width_chroma_ub / st_vars->LineTime; } else { MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", - __func__, LinesToRequestPrefetchPixelData); + __func__, st_vars->LinesToRequestPrefetchPixelData); #endif *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; @@ -3982,15 +3925,15 @@ bool dml32_CalculatePrefetchSchedule( } #ifdef __DML_VBA_DEBUG__ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", - (double)LinesToRequestPrefetchPixelData * LineTime + - 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); - dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); + (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime + + 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE); dml_print("DML: To: %fus - time for propagation from scaler to optc\n", - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime); dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - - TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + - ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime - + st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler + + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); #endif @@ -3998,7 +3941,7 @@ bool dml32_CalculatePrefetchSchedule( MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", - __func__, dst_y_prefetch_equ); + __func__, st_vars->dst_y_prefetch_equ); #endif } @@ -4014,10 +3957,10 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); #endif prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / - (*DestinationLinesToRequestVMInVBlank * LineTime); + (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif @@ -4034,7 +3977,7 @@ bool dml32_CalculatePrefetchSchedule( prefetch_row_bw = 0; } else if (*DestinationLinesToRequestRowInVBlank > 0) { prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / - (*DestinationLinesToRequestRowInVBlank * LineTime); + (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); @@ -4057,12 +4000,12 @@ bool dml32_CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; + st_vars->TimeForFetchingMetaPTE = 0; + st_vars->TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; + st_vars->LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; @@ -4216,6 +4159,7 @@ void dml32_CalculateFlipSchedule( } // CalculateFlipSchedule void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, @@ -4277,37 +4221,15 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( double ActiveDRAMClockChangeLatencyMargin[]) { unsigned int i, j, k; - unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; - unsigned int DRAMClockChangeSupportNumber = 0; - unsigned int LastSurfaceWithoutMargin; - unsigned int DRAMClockChangeMethod = 0; - bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; - double MinActiveFCLKChangeMargin = 0.; - double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; - double ActiveClockChangeLatencyHidingY; - double ActiveClockChangeLatencyHidingC; - double ActiveClockChangeLatencyHiding; - double EffectiveDETBufferSizeY; - double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; - double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; - double TotalPixelBW = 0.0; - bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; - double EffectiveLBLatencyHidingY; - double EffectiveLBLatencyHidingC; - double LinesInDETY[DC__NUM_DPP__MAX]; - double LinesInDETC[DC__NUM_DPP__MAX]; - unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; - unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; - double FullDETBufferingTimeY; - double FullDETBufferingTimeC; - double WritebackDRAMClockChangeLatencyMargin; - double WritebackFCLKChangeLatencyMargin; - double WritebackLatencyHiding; - bool SameTimingForFCLKChange; - - unsigned int TotalActiveWriteback = 0; - unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; - unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; + + st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0; + st_vars->DRAMClockChangeSupportNumber = 0; + st_vars->DRAMClockChangeMethod = 0; + st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; + st_vars->MinActiveFCLKChangeMargin = 0.; + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; + st_vars->TotalPixelBW = 0.0; + st_vars->TotalActiveWriteback = 0; Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency @@ -4339,13 +4261,13 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif - TotalActiveWriteback = 0; + st_vars->TotalActiveWriteback = 0; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (WritebackEnable[k] == true) - TotalActiveWriteback = TotalActiveWriteback + 1; + st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1; } - if (TotalActiveWriteback <= 1) { + if (st_vars->TotalActiveWriteback <= 1) { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; } else { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency @@ -4355,7 +4277,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark + mmSOCParameters.USRRetrainingLatency; - if (TotalActiveWriteback <= 1) { + if (st_vars->TotalActiveWriteback <= 1) { Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.WritebackLatency; Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency @@ -4385,14 +4307,14 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + + st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); - LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); + st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); + st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); #ifdef __DML_VBA_DEBUG__ @@ -4403,72 +4325,72 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); #endif - EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); - EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); - EffectiveDETBufferSizeY = DETBufferSizeY[k]; + st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); + st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k]; if (UnboundedRequestEnabled) { - EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY + CompressedBufferSizeInkByte * 1024 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) - / (HTotal[k] / PixelClock[k]) / TotalPixelBW; + / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW; } - LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; - LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; + st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]); + st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; - ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY + st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY + st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; } if (BytePerPixelDETC[k] > 0) { - LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; - LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) + st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]); + st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; - ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC + st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC + st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; } - ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, - ActiveClockChangeLatencyHidingC); + st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY, + st_vars->ActiveClockChangeLatencyHidingC); } else { - ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; + st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY; } - ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->DRAMClockChangeWatermark; - ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->FCLKChangeWatermark; - USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; + st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; if (WritebackEnable[k]) { - WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 + st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); if (WritebackPixelFormat[k] == dm_444_64) - WritebackLatencyHiding = WritebackLatencyHiding / 2; + st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2; - WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding + st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding - Watermark->WritebackDRAMClockChangeWatermark; - WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding + st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding - Watermark->WritebackFCLKChangeWatermark; ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], - WritebackFCLKChangeLatencyMargin); - ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], - WritebackDRAMClockChangeLatencyMargin); + st_vars->WritebackFCLKChangeLatencyMargin); + st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k], + st_vars->WritebackDRAMClockChangeLatencyMargin); } MaxActiveDRAMClockChangeLatencySupported[k] = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? @@ -4487,41 +4409,41 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (DRRDisplay[i] || DRRDisplay[j]))) { - SynchronizedSurfaces[i][j] = true; + st_vars->SynchronizedSurfaces[i][j] = true; } else { - SynchronizedSurfaces[i][j] = false; + st_vars->SynchronizedSurfaces[i][j] = false; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || - ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { - FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; - MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; - SurfaceWithMinActiveFCLKChangeMargin = k; + (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin || + st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) { + st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; + st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k]; + st_vars->SurfaceWithMinActiveFCLKChangeMargin = k; } } - *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; + *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; - SameTimingForFCLKChange = true; + st_vars->SameTimingForFCLKChange = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { + if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (SameTimingForFCLKChange || - ActiveFCLKChangeLatencyMargin[k] < - SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { - SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; + (st_vars->SameTimingForFCLKChange || + st_vars->ActiveFCLKChangeLatencyMargin[k] < + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k]; } - SameTimingForFCLKChange = false; + st_vars->SameTimingForFCLKChange = false; } } - if (MinActiveFCLKChangeMargin > 0) { + if (st_vars->MinActiveFCLKChangeMargin > 0) { *FCLKChangeSupport = dm_fclock_change_vactive; - } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && + } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && (PrefetchMode <= 1)) { *FCLKChangeSupport = dm_fclock_change_vblank; } else { @@ -4531,7 +4453,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *USRRetrainingSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (USRRetrainingLatencyMargin[k] < 0)) { + (st_vars->USRRetrainingLatencyMargin[k] < 0)) { *USRRetrainingSupport = false; } } @@ -4542,42 +4464,42 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && ActiveDRAMClockChangeLatencyMargin[k] < 0) { if (PrefetchMode > 0) { - DRAMClockChangeSupportNumber = 2; - } else if (DRAMClockChangeSupportNumber == 0) { - DRAMClockChangeSupportNumber = 1; - LastSurfaceWithoutMargin = k; - } else if (DRAMClockChangeSupportNumber == 1 && - !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { - DRAMClockChangeSupportNumber = 2; + st_vars->DRAMClockChangeSupportNumber = 2; + } else if (st_vars->DRAMClockChangeSupportNumber == 0) { + st_vars->DRAMClockChangeSupportNumber = 1; + st_vars->LastSurfaceWithoutMargin = k; + } else if (st_vars->DRAMClockChangeSupportNumber == 1 && + !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) { + st_vars->DRAMClockChangeSupportNumber = 2; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - DRAMClockChangeMethod = 1; + st_vars->DRAMClockChangeMethod = 1; else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - DRAMClockChangeMethod = 2; + st_vars->DRAMClockChangeMethod = 2; } - if (DRAMClockChangeMethod == 0) { - if (DRAMClockChangeSupportNumber == 0) + if (st_vars->DRAMClockChangeMethod == 0) { + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; - } else if (DRAMClockChangeMethod == 1) { - if (DRAMClockChangeSupportNumber == 0) + } else if (st_vars->DRAMClockChangeMethod == 1) { + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } else { - if (DRAMClockChangeSupportNumber == 0) + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; @@ -4591,7 +4513,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); - src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; + src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k]; sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; #ifdef __DML_VBA_DEBUG__ @@ -4599,7 +4521,7 @@ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DET dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); -dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); +dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]); dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); @@ -4610,7 +4532,7 @@ dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l if (BytePerPixelDETC[k] > 0) { src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); - src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; + src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k]; sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); |