aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
diff options
context:
space:
mode:
authorAurabindo Pillai <aurabindo.pillai@amd.com>2022-02-21 16:58:49 -0500
committerAlex Deucher <alexander.deucher@amd.com>2022-06-03 16:43:37 -0400
commitdda4fb85e433f761eedaed0005a2bc20d705d504 (patch)
tree0131cd8049db2ddbbf88429c7e9459bf9bc157f4 /drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
parentdrm/amd/display: add GPIO changes for DCN32/321 (diff)
downloadlinux-dev-dda4fb85e433f761eedaed0005a2bc20d705d504.tar.xz
linux-dev-dda4fb85e433f761eedaed0005a2bc20d705d504.zip
drm/amd/display: DML changes for DCN32/321
DML is required for display configuration modelling for things like bandwidth management and validation. Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c')
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c6254
1 files changed, 6254 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
new file mode 100644
index 000000000000..6509a84eeb64
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -0,0 +1,6254 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "display_mode_vba_util_32.h"
+#include "../dml_inline_defs.h"
+#include "display_mode_vba_32.h"
+#include "../display_mode_lib.h"
+
+unsigned int dml32_dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum output_format_class pixelFormat,
+ enum output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line,
+ // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
+ Delay, pixels;
+
+ if (pixelFormat == dm_420)
+ pixelsPerClock = 2;
+ else if (pixelFormat == dm_n422)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ p = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + p * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && p != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: bpc: %d\n", __func__, bpc);
+ dml_print("DML::%s: BPP: %f\n", __func__, BPP);
+ dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
+ dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
+ dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
+ dml_print("DML::%s: Output: %d\n", __func__, Output);
+ dml_print("DML::%s: pixels: %d\n", __func__, pixels);
+#endif
+
+ return pixels;
+}
+
+unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+
+ if (pixelFormat == dm_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+
+ return Delay;
+}
+
+
+bool IsVertical(enum dm_rotation_angle Scan)
+{
+ bool is_vert = false;
+
+ if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
+ is_vert = true;
+ else
+ is_vert = false;
+ return is_vert;
+}
+
+void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ /* output */
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP)
+{
+ double DPPCLKUsingSingleDPPLuma;
+ double DPPCLKUsingSingleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
+ dml_ceil((double) HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
+ *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
+
+ if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
+ SourcePixelFormat != dm_rgbe_alpha)) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
+ HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
+ }
+}
+
+void dml32_CalculateBytePerPixelAndBlockSizes(
+ enum source_format_class SourcePixelFormat,
+ enum dm_swizzle_mode SurfaceTiling,
+
+ /* Output */
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC)
+{
+ if (SourcePixelFormat == dm_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_444_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dm_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = 4.0 / 3;
+ *BytePerPixelDETC = 8.0 / 3;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
+ dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+ dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
+ dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
+#endif
+ if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
+ || SourcePixelFormat == dm_444_16
+ || SourcePixelFormat == dm_444_8
+ || SourcePixelFormat == dm_mono_16
+ || SourcePixelFormat == dm_mono_8
+ || SourcePixelFormat == dm_rgbe)) {
+ if (SurfaceTiling == dm_sw_linear)
+ *BlockHeight256BytesY = 1;
+ else if (SourcePixelFormat == dm_444_64)
+ *BlockHeight256BytesY = 4;
+ else if (SourcePixelFormat == dm_444_8)
+ *BlockHeight256BytesY = 16;
+ else
+ *BlockHeight256BytesY = 8;
+
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dm_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dm_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dm_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
+ dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
+ dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
+ dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
+#endif
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *MacroTileHeightY = *BlockHeight256BytesY;
+ *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
+ } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
+ SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
+ *MacroTileHeightY = 16 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 16 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
+ } else {
+ *MacroTileHeightY = 32 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 32 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0)
+ *MacroTileWidthC = 0;
+ else
+ *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
+ dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
+ dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
+ dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
+#endif
+} // CalculateBytePerPixelAndBlockSizes
+
+void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ /* output */
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingdoubleDPP)
+{
+ double DPPCLKUsingdoubleDPPLuma;
+ double DPPCLKUsingdoubleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
+ dml_ceil((double) HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingdoubleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
+ *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingdoubleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingdoubleDPPLuma = 2 * PixelClock;
+
+ if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
+ SourcePixelFormat != dm_rgbe_alpha)) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingdoubleDPP = DPPCLKUsingdoubleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
+ HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingdoubleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingdoubleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingdoubleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingdoubleDPP = dml_max(DPPCLKUsingdoubleDPPLuma, DPPCLKUsingdoubleDPPChroma);
+ }
+}
+
+void dml32_CalculateSwathAndDETConfiguration(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ double ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int nomDETInKByte,
+ enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum output_encoder_class Output[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double MaximumSwathWidthLuma[],
+ double MaximumSwathWidthChroma[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ enum source_format_class SourcePixelFormat[],
+ enum dm_swizzle_mode SurfaceTiling[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BlendingAndTiming[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ double BytePerPixDETY[],
+ double BytePerPixDETC[],
+ unsigned int HActive[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double SwathWidth[],
+ double SwathWidthChroma[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ bool *UnboundedRequestEnabled,
+ unsigned int *CompressedBufferSizeInkByte,
+ bool ViewportSizeSupportPerSurface[],
+ bool *ViewportSizeSupport)
+{
+ unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpSwathSizeBytesY;
+ unsigned int RoundedUpSwathSizeBytesC;
+ double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
+ unsigned int k;
+ unsigned int TotalActiveDPP = 0;
+ bool NoChromaSurfaces = true;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+#endif
+ dml32_CalculateSwathWidth(ForceSingleDPP,
+ NumberOfActiveSurfaces,
+ SourcePixelFormat,
+ SourceRotation,
+ ViewportStationary,
+ ViewportWidth,
+ ViewportHeight,
+ ViewportXStart,
+ ViewportYStart,
+ ViewportXStartC,
+ ViewportYStartC,
+ SurfaceWidthY,
+ SurfaceWidthC,
+ SurfaceHeightY,
+ SurfaceHeightC,
+ ODMMode,
+ BytePerPixY,
+ BytePerPixC,
+ Read256BytesBlockHeightY,
+ Read256BytesBlockHeightC,
+ Read256BytesBlockWidthY,
+ Read256BytesBlockWidthC,
+ BlendingAndTiming,
+ HActive,
+ HRatio,
+ DPPPerSurface,
+
+ /* Output */
+ SwathWidthdoubleDPP,
+ SwathWidthdoubleDPPChroma,
+ SwathWidth,
+ SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ swath_width_luma_ub,
+ swath_width_chroma_ub);
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_10) {
+ RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
+ RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
+ NoChromaSurfaces = false;
+ }
+ }
+
+ *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP,
+ NoChromaSurfaces, Output[0]);
+
+ dml32_CalculateDETBufferSize(DETSizeOverride,
+ UseMALLForPStateChange,
+ ForceSingleDPP,
+ NumberOfActiveSurfaces,
+ *UnboundedRequestEnabled,
+ nomDETInKByte,
+ MaxTotalDETInKByte,
+ ConfigReturnBufferSizeInKByte,
+ MinCompressedBufferSizeInKByte,
+ CompressedBufferSegmentSizeInkByteFinal,
+ SourcePixelFormat,
+ ReadBandwidthLuma,
+ ReadBandwidthChroma,
+ RoundedUpMaxSwathSizeBytesY,
+ RoundedUpMaxSwathSizeBytesC,
+ DPPPerSurface,
+
+ /* Output */
+ DETBufferSizeInKByte, // per hubp pipe
+ CompressedBufferSizeInkByte);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+#endif
+
+ *ViewportSizeSupport = true;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
+ dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
+ DETBufferSizeInKByteForSwathCalculation);
+#endif
+
+ if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ } else {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ }
+
+ if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
+ || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
+ SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
+ *ViewportSizeSupport = false;
+ ViewportSizeSupportPerSurface[k] = false;
+ } else {
+ ViewportSizeSupportPerSurface[k] = true;
+ }
+
+ if (SwathHeightC[k] == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
+ DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
+ DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
+ } else {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
+#endif
+ DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
+ DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
+ dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
+ k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
+ k, RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
+ dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
+ dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
+ ViewportSizeSupportPerSurface[k]);
+#endif
+
+ }
+} // CalculateSwathAndDETConfiguration
+
+void dml32_CalculateSwathWidth(
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum source_format_class SourcePixelFormat[],
+ enum dm_rotation_angle SourceRotation[],
+ bool ViewportStationary[],
+ unsigned int ViewportWidth[],
+ unsigned int ViewportHeight[],
+ unsigned int ViewportXStart[],
+ unsigned int ViewportYStart[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ enum odm_combine_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int BlendingAndTiming[],
+ unsigned int HActive[],
+ double HRatio[],
+ unsigned int DPPPerSurface[],
+
+ /* Output */
+ double SwathWidthdoubleDPPY[],
+ double SwathWidthdoubleDPPC[],
+ double SwathWidthY[], // per-pipe
+ double SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]) // per-pipe
+{
+ unsigned int k, j;
+ enum odm_combine_mode MainSurfaceODMMode;
+
+ unsigned int surface_width_ub_l;
+ unsigned int surface_height_ub_l;
+ unsigned int surface_width_ub_c;
+ unsigned int surface_height_ub_c;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!IsVertical(SourceRotation[k]))
+ SwathWidthdoubleDPPY[k] = ViewportWidth[k];
+ else
+ SwathWidthdoubleDPPY[k] = ViewportHeight[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
+#endif
+
+ MainSurfaceODMMode = ODMMode[k];
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ if (BlendingAndTiming[k] == j)
+ MainSurfaceODMMode = ODMMode[j];
+ }
+
+ if (ForceSingleDPP) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ } else {
+ if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
+ SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
+ dml_round(HActive[k] / 4.0 * HRatio[k]));
+ } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
+ SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
+ dml_round(HActive[k] / 2.0 * HRatio[k]));
+ } else if (DPPPerSurface[k] == 2) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
+ } else {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
+ dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
+ dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
+ dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthdoubleDPPY[k];
+ SwathWidthC[k] = SwathWidthdoubleDPPC[k];
+ }
+
+ surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+ surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+ surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
+ dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
+ dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
+ dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
+ dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
+ dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
+#endif
+
+ if (!IsVertical(SourceRotation[k])) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
+ dml_floor(ViewportXStart[k] +
+ SwathWidthY[k] +
+ Read256BytesBlockWidthY[k] - 1,
+ Read256BytesBlockWidthY[k]) -
+ dml_floor(ViewportXStart[k],
+ Read256BytesBlockWidthY[k]));
+ } else {
+ swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
+ dml_ceil(SwathWidthY[k] - 1,
+ Read256BytesBlockWidthY[k]) +
+ Read256BytesBlockWidthY[k]);
+ }
+ if (BytePerPixC[k] > 0) {
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
+ dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
+ Read256BytesBlockWidthC[k] - 1,
+ Read256BytesBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k],
+ Read256BytesBlockWidthC[k]));
+ } else {
+ swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
+ dml_ceil(SwathWidthC[k] - 1,
+ Read256BytesBlockWidthC[k]) +
+ Read256BytesBlockWidthC[k]);
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
+ SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
+ Read256BytesBlockHeightY[k]) -
+ dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
+ } else {
+ swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
+ Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
+ }
+ if (BytePerPixC[k] > 0) {
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
+ dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
+ Read256BytesBlockHeightC[k] - 1,
+ Read256BytesBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k],
+ Read256BytesBlockHeightC[k]));
+ } else {
+ swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
+ dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
+ Read256BytesBlockHeightC[k]);
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
+#endif
+
+ }
+} // CalculateSwathWidth
+
+bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+ unsigned int TotalNumberOfActiveDPP,
+ bool NoChroma,
+ enum output_encoder_class Output)
+{
+ bool ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
+ TotalNumberOfActiveDPP == 1 && NoChroma);
+ if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
+ ret_val = false;
+ return ret_val;
+}
+
+void dml32_CalculateDETBufferSize(
+ unsigned int DETSizeOverride[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int CompressedBufferSegmentSizeInkByteFinal,
+ enum source_format_class SourcePixelFormat[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int RoundedUpMaxSwathSizeBytesY[],
+ unsigned int RoundedUpMaxSwathSizeBytesC[],
+ unsigned int DPPPerSurface[],
+ /* Output */
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte)
+{
+ unsigned int DETBufferSizePoolInKByte;
+ unsigned int NextDETBufferPieceInKByte;
+ bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
+ bool NextPotentialSurfaceToAssignDETPieceFound;
+ unsigned int NextSurfaceToAssignDETPiece;
+ double TotalBandwidth;
+ double BandwidthOfSurfacesNotAssignedDETPiece;
+ unsigned int max_minDET;
+ unsigned int minDET;
+ unsigned int minDET_pipe;
+ unsigned int j, k;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
+ dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
+ CompressedBufferSegmentSizeInkByteFinal);
+#endif
+
+ // Note: Will use default det size if that fits 2 swaths
+ if (UnboundedRequestEnabled) {
+ if (DETSizeOverride[0] > 0) {
+ DETBufferSizeInKByte[0] = DETSizeOverride[0];
+ } else {
+ DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
+ ((double) RoundedUpMaxSwathSizeBytesY[0] +
+ (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
+ }
+ *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
+ } else {
+ DETBufferSizePoolInKByte = MaxTotalDETInKByte;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DETBufferSizeInKByte[k] = nomDETInKByte;
+ if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
+ SourcePixelFormat[k] == dm_420_12) {
+ max_minDET = nomDETInKByte - 64;
+ } else {
+ max_minDET = nomDETInKByte;
+ }
+ minDET = 128;
+ minDET_pipe = 0;
+
+ // add DET resource until can hold 2 full swaths
+ while (minDET <= max_minDET && minDET_pipe == 0) {
+ if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
+ (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
+ minDET_pipe = minDET;
+ minDET = minDET + 64;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
+ dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
+ dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
+ RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (minDET_pipe == 0) {
+ minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
+ (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
+ __func__, k, minDET_pipe);
+#endif
+ }
+
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
+ DETBufferSizeInKByte[k] = 0;
+ } else if (DETSizeOverride[k] > 0) {
+ DETBufferSizeInKByte[k] = DETSizeOverride[k];
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
+ (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
+ } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
+ DETBufferSizeInKByte[k] = minDET_pipe;
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
+ (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
+#endif
+ }
+
+ TotalBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
+ TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+ for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
+#endif
+ BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
+ (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
+ ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
+ ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
+ } else {
+ DETPieceAssignedToThisSurfaceAlready[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
+ DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
+ BandwidthOfSurfacesNotAssignedDETPiece);
+#endif
+ }
+
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ NextPotentialSurfaceToAssignDETPieceFound = false;
+ NextSurfaceToAssignDETPiece = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
+ ReadBandwidthLuma[k]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
+ ReadBandwidthChroma[k]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
+ ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
+ NextSurfaceToAssignDETPiece);
+#endif
+ if (!DETPieceAssignedToThisSurfaceAlready[k] &&
+ (!NextPotentialSurfaceToAssignDETPieceFound ||
+ ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
+ ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
+ NextSurfaceToAssignDETPiece = k;
+ NextPotentialSurfaceToAssignDETPieceFound = true;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
+ __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
+ __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
+#endif
+ }
+
+ if (NextPotentialSurfaceToAssignDETPieceFound) {
+ // Note: To show the banker's rounding behavior in VBA and also the fact
+ // that the DET buffer size varies due to precision issue
+ //
+ //double tmp1 = ((double) DETBufferSizePoolInKByte *
+ // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ // BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
+ // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ //BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //
+ //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
+ //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
+
+ NextDETBufferPieceInKByte = dml_min(
+ dml_round((double) DETBufferSizePoolInKByte *
+ (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
+ BandwidthOfSurfacesNotAssignedDETPiece /
+ ((ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
+ (ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
+ dml_floor((double) DETBufferSizePoolInKByte,
+ (ForceSingleDPP ? 1 :
+ DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+
+ // Above calculation can assign the entire DET buffer allocation to a single pipe.
+ // We should limit the per-pipe DET size to the nominal / max per pipe.
+ if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
+ nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
+ } else {
+ // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
+ // already has the max per-pipe value
+ NextDETBufferPieceInKByte = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
+ DETBufferSizePoolInKByte);
+ dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
+ NextSurfaceToAssignDETPiece);
+ dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
+ NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
+ NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
+ __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
+ dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
+ NextDETBufferPieceInKByte);
+ dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
+ __func__, j, NextSurfaceToAssignDETPiece,
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
+ + NextDETBufferPieceInKByte
+ / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
+ DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
+ (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
+ ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ }
+ }
+ *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
+ }
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
+ for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
+ __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ }
+#endif
+} // CalculateDETBufferSize
+
+void dml32_CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum output_encoder_class Output,
+ enum odm_combine_policy ODMUse,
+ double StateDispclk,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+
+ /* Output */
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum odm_combine_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface)
+{
+
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+
+ SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
+ PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
+ MaxDispclk);
+ *TotalAvailablePipesSupport = true;
+ *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
+
+ if (ODMUse == dm_odm_combine_policy_none)
+ *ODMMode = dm_odm_combine_mode_disabled;
+
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
+ *NumberOfDPP = 0;
+
+ // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
+ // (ODMUse == "" || ODMUse == "CombineAsNeeded")
+
+ if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
+ ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
+ (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
+ if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
+ *ODMMode = dm_odm_combine_mode_4to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPP = 4;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
+ (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
+ (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
+ if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
+ *ODMMode = dm_odm_combine_mode_2to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPP = 2;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else {
+ if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
+ *NumberOfDPP = 1;
+ else
+ *TotalAvailablePipesSupport = false;
+ }
+}
+
+double dml32_CalculateRequiredDispclk(
+ enum odm_combine_mode ODMMode,
+ double PixelClock,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKRampingMargin,
+ double DISPCLKDPPCLKVCOSpeed,
+ double MaxDispclk)
+{
+ double RequiredDispclk = 0.;
+ double PixelClockAfterODM;
+ double DISPCLKWithRampingRoundedToDFSGranularity;
+ double DISPCLKWithoutRampingRoundedToDFSGranularity;
+ double MaxDispclkRoundedDownToDFSGranularity;
+
+ if (ODMMode == dm_odm_combine_mode_4to1)
+ PixelClockAfterODM = PixelClock / 4;
+ else if (ODMMode == dm_odm_combine_mode_2to1)
+ PixelClockAfterODM = PixelClock / 2;
+ else
+ PixelClockAfterODM = PixelClock;
+
+
+ DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
+ PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
+ * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
+
+ DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
+ PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
+
+ MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
+
+ if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
+ RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
+ else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
+ RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
+ else
+ RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
+
+ return RequiredDispclk;
+}
+
+double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
+{
+ if (Clock <= 0.0)
+ return 0.0;
+
+ if (round_up)
+ return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
+ else
+ return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
+}
+
+void dml32_CalculateOutputLink(
+ double PHYCLKPerState,
+ double PHYCLKD18PerState,
+ double PHYCLKD32PerState,
+ double Downspreading,
+ bool IsMainSurfaceUsingTheIndicatedTiming,
+ enum output_encoder_class Output,
+ enum output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ bool DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dm_output_link_dp_rate OutputLinkDPRate,
+
+ /* Output */
+ bool *RequiresDSC,
+ double *RequiresFEC,
+ double *OutBpp,
+ enum dm_output_type *OutputType,
+ enum dm_output_rate *OutputRate,
+ unsigned int *RequiredSlots)
+{
+ bool LinkDSCEnable;
+ unsigned int dummy;
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = 0;
+ *OutputType = dm_output_type_unknown;
+ *OutputRate = dm_output_rate_unknown;
+
+ if (IsMainSurfaceUsingTheIndicatedTiming) {
+ if (Output == dm_hdmi) {
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
+ PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = "HDMI";
+ *OutputType = dm_output_type_hdmi;
+
+ } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
+ if (DSCEnable == true) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp || Output == dm_dp2p0)
+ *RequiresFEC = true;
+ else
+ *RequiresFEC = false;
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ if (Output == dm_dp2p0)
+ *RequiresFEC = true;
+ else
+ *RequiresFEC = false;
+ }
+ if (Output == dm_dp2p0) {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
+ PHYCLKD32PerState >= 10000 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR10";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr10;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
+ *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR13p5";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
+ *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR20";
+ *OutputType = dm_output_type_dp2p0;
+ *OutputRate = dm_output_rate_dp_rate_uhbr20;
+ }
+ } else {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
+ PHYCLKPerState >= 270) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
+ *OutBpp == 0 && PHYCLKPerState >= 540) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
+ DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
+ AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
+ ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR2";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr2;
+ }
+ if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
+ AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
+ RequiredSlots);
+
+ if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dm_dp)
+ *RequiresFEC = true;
+
+ *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
+ OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
+ ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent,
+ NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
+ ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR3";
+ *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
+ *OutputRate = dm_output_rate_dp_rate_hbr3;
+ }
+ }
+ }
+ }
+}
+
+void dml32_CalculateDPPCLK(
+ unsigned int NumberOfActiveSurfaces,
+ double DISPCLKDPPCLKDSCCLKDownSpreading,
+ double DISPCLKDPPCLKVCOSpeed,
+ double DPPCLKUsingSingleDPP[],
+ unsigned int DPPPerSurface[],
+
+ /* output */
+ double *GlobalDPPCLK,
+ double Dppclk[])
+{
+ unsigned int k;
+ *GlobalDPPCLK = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
+ *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
+ }
+ *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
+}
+
+double dml32_TruncToValidBPP(
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum output_encoder_class Output,
+ enum output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum odm_combine_mode ODMModeNoDSC,
+ enum odm_combine_mode ODMModeDSC,
+ /* Output */
+ unsigned int *RequiredSlots)
+{
+ double MaxLinkBPP;
+ unsigned int MinDSCBPP;
+ double MaxDSCBPP;
+ unsigned int NonDSCBPP0;
+ unsigned int NonDSCBPP1;
+ unsigned int NonDSCBPP2;
+ unsigned int NonDSCBPP3;
+
+ if (Format == dm_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ } else if (Format == dm_444) {
+ NonDSCBPP0 = 18;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 30;
+ NonDSCBPP3 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+ if (Output == dm_hdmi) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 24;
+ } else {
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+ }
+ if (Format == dm_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+ if (Output == dm_dp2p0) {
+ MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
+ } else if (DSCEnable && Output == dm_dp) {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
+ }
+
+ if (DSCEnable) {
+ if (ODMModeDSC == dm_odm_combine_mode_4to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ else if (ODMModeDSC == dm_odm_combine_mode_2to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ else if (ODMModeDSC == dm_odm_split_mode_1to2)
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ } else {
+ if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP)
+ return BPP_INVALID;
+ else if (MaxLinkBPP >= MaxDSCBPP)
+ return MaxDSCBPP;
+ else
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP3)
+ return NonDSCBPP3;
+ else if (MaxLinkBPP >= NonDSCBPP2)
+ return NonDSCBPP2;
+ else if (MaxLinkBPP >= NonDSCBPP1)
+ return NonDSCBPP1;
+ else if (MaxLinkBPP >= NonDSCBPP0)
+ return 16.0;
+ else
+ return BPP_INVALID;
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
+ DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
+ (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
+ return BPP_INVALID;
+ else
+ return DesiredBPP;
+ }
+
+ *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
+
+ return BPP_INVALID;
+} // TruncToValidBPP
+
+double dml32_RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout)
+{
+ double PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
+ double HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
+ dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+ double HCBlank = 64 + 32 *
+ dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
+ double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+ double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+
+ if (DSCEnable != true)
+ return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
+
+ return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
+}
+
+unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
+ enum odm_combine_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum output_format_class OutputFormat,
+ enum output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd)
+{
+ unsigned int DSCDelayRequirement_val;
+
+ if (DSCEnabled == true && OutputBpp != 0) {
+ if (ODMMode == dm_odm_combine_mode_4to1) {
+ DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
+ } else if (ODMMode == dm_odm_combine_mode_2to1) {
+ DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
+ } else {
+ DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
+ dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
+ OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
+ }
+
+ DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
+ dml_ceil(DSCDelayRequirement_val / HActive, 1);
+
+ DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
+
+ } else {
+ DSCDelayRequirement_val = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
+ dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+ dml_print("DML::%s: HActive = %d\n", __func__, HActive);
+ dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
+ dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
+ dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
+ dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
+#endif
+
+ return DSCDelayRequirement_val;
+}
+
+void dml32_CalculateSurfaceSizeInMall(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ bool DCCEnable[],
+ bool ViewportStationary[],
+ unsigned int ViewportXStartY[],
+ unsigned int ViewportYStartY[],
+ unsigned int ViewportXStartC[],
+ unsigned int ViewportYStartC[],
+ unsigned int ViewportWidthY[],
+ unsigned int ViewportHeightY[],
+ unsigned int BytesPerPixelY[],
+ unsigned int ViewportWidthC[],
+ unsigned int ViewportHeightC[],
+ unsigned int BytesPerPixelC[],
+ unsigned int SurfaceWidthY[],
+ unsigned int SurfaceWidthC[],
+ unsigned int SurfaceHeightY[],
+ unsigned int SurfaceHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+
+ /* Output */
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize)
+{
+ unsigned int TotalSurfaceSizeInMALL = 0;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ViewportStationary[k]) {
+ SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
+ dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
+ ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
+ ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
+ ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
+ ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
+ dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
+
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
+ dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
+ ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
+ dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
+ ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
+ BytesPerPixelC[k];
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
+ dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
+ Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
+ - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
+ * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
+ Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
+ ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
+ Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
+ * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], 8 *
+ Read256BytesBlockWidthC[k]),
+ dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
+ * Read256BytesBlockWidthC[k] - 1, 8 *
+ Read256BytesBlockWidthC[k]) -
+ dml_floor(ViewportXStartC[k], 8 *
+ Read256BytesBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], 8 *
+ Read256BytesBlockHeightC[k]),
+ dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
+ 8 * Read256BytesBlockHeightC[k] - 1, 8 *
+ Read256BytesBlockHeightC[k]) -
+ dml_floor(ViewportYStartC[k], 8 *
+ Read256BytesBlockHeightC[k])) *
+ BytesPerPixelC[k] / 256;
+ }
+ }
+ } else {
+ SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
+ ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
+ ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
+ BytesPerPixelY[k];
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
+ ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
+ ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
+ BytesPerPixelC[k];
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
+ Read256BytesBlockWidthY[k] - 1), 8 *
+ Read256BytesBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
+ Read256BytesBlockHeightY[k] - 1), 8 *
+ Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
+
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
+ Read256BytesBlockWidthC[k] - 1), 8 *
+ Read256BytesBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
+ Read256BytesBlockHeightC[k] - 1), 8 *
+ Read256BytesBlockHeightC[k]) *
+ BytesPerPixelC[k] / 256;
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+ }
+ *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
+} // CalculateSurfaceSizeInMall
+
+void dml32_CalculateVMRowAndSwath(
+ unsigned int NumberOfActiveSurfaces,
+ DmlPipe myPipe[],
+ unsigned int SurfaceSizeInMALL[],
+ unsigned int PTEBufferSizeInRequestsLuma,
+ unsigned int PTEBufferSizeInRequestsChroma,
+ unsigned int DCCMetaBufferSizeBytes,
+ enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int MALLAllocatedForDCN,
+ double SwathWidthY[],
+ double SwathWidthC[],
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes[],
+ unsigned int HostVMMinPageSize,
+
+ /* Output */
+ bool PTEBufferSizeNotExceeded[],
+ bool DCCMetaBufferSizeNotExceeded[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int dpte_row_height_luma[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
+ unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int PTERequestSizeC[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ double PrefetchSourceLinesY[],
+ double PrefetchSourceLinesC[],
+ double VInitPreFillY[],
+ double VInitPreFillC[],
+ unsigned int MaxNumSwathY[],
+ unsigned int MaxNumSwathC[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+ double PixelPTEBytesPerRow[],
+ double PDEAndMetaPTEBytesFrame[],
+ double MetaRowByte[],
+ bool use_one_row_for_frame[],
+ bool use_one_row_for_frame_flip[],
+ bool UsesMALLForStaticScreen[],
+ bool PTE_BUFFER_MODE[],
+ unsigned int BIGK_FRAGMENT_SIZE[])
+{
+ unsigned int k;
+ unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
+ unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
+ unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
+ bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (HostVMEnable == true) {
+ vm_group_bytes[k] = 512;
+ dpte_group_bytes[k] = 512;
+ } else if (GPUVMEnable == true) {
+ vm_group_bytes[k] = 2048;
+ if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
+ dpte_group_bytes[k] = 512;
+ else
+ dpte_group_bytes[k] = 2048;
+ } else {
+ vm_group_bytes[k] = 0;
+ dpte_group_bytes[k] = 0;
+ }
+
+ if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
+ myPipe[k].SourcePixelFormat == dm_420_12 ||
+ myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
+ if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
+ !IsVertical(myPipe[k].SourceRotation)) {
+ PTEBufferSizeInRequestsForLuma[k] =
+ (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
+ } else {
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
+ }
+
+ PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
+ myPipe[k].ViewportStationary,
+ myPipe[k].DCCEnable,
+ myPipe[k].DPPPerSurface,
+ myPipe[k].BlockHeight256BytesC,
+ myPipe[k].BlockWidth256BytesC,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].SurfaceTiling,
+ myPipe[k].BytePerPixelC,
+ myPipe[k].SourceRotation,
+ SwathWidthC[k],
+ myPipe[k].ViewportHeightChroma,
+ myPipe[k].ViewportXStartC,
+ myPipe[k].ViewportYStartC,
+ GPUVMEnable,
+ HostVMEnable,
+ HostVMMaxNonCachedPageTableLevels,
+ GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes[k],
+ HostVMMinPageSize,
+ PTEBufferSizeInRequestsForChroma[k],
+ myPipe[k].PitchC,
+ myPipe[k].DCCMetaPitchC,
+ myPipe[k].BlockWidthC,
+ myPipe[k].BlockHeightC,
+
+ /* Output */
+ &MetaRowByteC[k],
+ &PixelPTEBytesPerRowC[k],
+ &dpte_row_width_chroma_ub[k],
+ &dpte_row_height_chroma[k],
+ &dpte_row_height_linear_chroma[k],
+ &PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &dpte_row_height_chroma_one_row_per_frame[k],
+ &meta_req_width_chroma[k],
+ &meta_req_height_chroma[k],
+ &meta_row_width_chroma[k],
+ &meta_row_height_chroma[k],
+ &PixelPTEReqWidthC[k],
+ &PixelPTEReqHeightC[k],
+ &PTERequestSizeC[k],
+ &dpde0_bytes_per_frame_ub_c[k],
+ &meta_pte_bytes_per_frame_ub_c[k]);
+
+ PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
+ myPipe[k].VRatioChroma,
+ myPipe[k].VTapsChroma,
+ myPipe[k].InterlaceEnable,
+ myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ myPipe[k].SwathHeightC,
+ myPipe[k].SourceRotation,
+ myPipe[k].ViewportStationary,
+ SwathWidthC[k],
+ myPipe[k].ViewportHeightChroma,
+ myPipe[k].ViewportXStartC,
+ myPipe[k].ViewportYStartC,
+
+ /* Output */
+ &VInitPreFillC[k],
+ &MaxNumSwathC[k]);
+ } else {
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForChroma[k] = 0;
+ PixelPTEBytesPerRowC[k] = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC[k] = 0;
+ MaxNumSwathC[k] = 0;
+ PrefetchSourceLinesC[k] = 0;
+ dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ }
+
+ PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
+ myPipe[k].ViewportStationary,
+ myPipe[k].DCCEnable,
+ myPipe[k].DPPPerSurface,
+ myPipe[k].BlockHeight256BytesY,
+ myPipe[k].BlockWidth256BytesY,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].SurfaceTiling,
+ myPipe[k].BytePerPixelY,
+ myPipe[k].SourceRotation,
+ SwathWidthY[k],
+ myPipe[k].ViewportHeight,
+ myPipe[k].ViewportXStart,
+ myPipe[k].ViewportYStart,
+ GPUVMEnable,
+ HostVMEnable,
+ HostVMMaxNonCachedPageTableLevels,
+ GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes[k],
+ HostVMMinPageSize,
+ PTEBufferSizeInRequestsForLuma[k],
+ myPipe[k].PitchY,
+ myPipe[k].DCCMetaPitchY,
+ myPipe[k].BlockWidthY,
+ myPipe[k].BlockHeightY,
+
+ /* Output */
+ &MetaRowByteY[k],
+ &PixelPTEBytesPerRowY[k],
+ &dpte_row_width_luma_ub[k],
+ &dpte_row_height_luma[k],
+ &dpte_row_height_linear_luma[k],
+ &PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &dpte_row_width_luma_ub_one_row_per_frame[k],
+ &dpte_row_height_luma_one_row_per_frame[k],
+ &meta_req_width[k],
+ &meta_req_height[k],
+ &meta_row_width[k],
+ &meta_row_height[k],
+ &PixelPTEReqWidthY[k],
+ &PixelPTEReqHeightY[k],
+ &PTERequestSizeY[k],
+ &dpde0_bytes_per_frame_ub_l[k],
+ &meta_pte_bytes_per_frame_ub_l[k]);
+
+ PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
+ myPipe[k].VRatio,
+ myPipe[k].VTaps,
+ myPipe[k].InterlaceEnable,
+ myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ myPipe[k].SwathHeightY,
+ myPipe[k].SourceRotation,
+ myPipe[k].ViewportStationary,
+ SwathWidthY[k],
+ myPipe[k].ViewportHeight,
+ myPipe[k].ViewportXStart,
+ myPipe[k].ViewportYStart,
+
+ /* Output */
+ &VInitPreFillY[k],
+ &MaxNumSwathY[k]);
+
+ PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
+
+ if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
+ PTEBufferSizeNotExceeded[k] = true;
+ } else {
+ PTEBufferSizeNotExceeded[k] = false;
+ }
+
+ one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
+ PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
+ }
+
+ dml32_CalculateMALLUseForStaticScreen(
+ NumberOfActiveSurfaces,
+ MALLAllocatedForDCN,
+ UseMALLForStaticScreen, // mode
+ SurfaceSizeInMALL,
+ one_row_per_frame_fits_in_buffer,
+ /* Output */
+ UsesMALLForStaticScreen); // boolen
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
+ (GPUVMMinPageSizeKBytes[k] > 64);
+ BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
+ dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
+#endif
+ use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
+ (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
+
+ use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
+ !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
+
+ if (use_one_row_for_frame[k]) {
+ dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
+ dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
+ dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
+ dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
+ PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
+ }
+
+ if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
+ DCCMetaBufferSizeNotExceeded[k] = true;
+ else
+ DCCMetaBufferSizeNotExceeded[k] = false;
+
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
+ if (use_one_row_for_frame[k])
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
+
+ dml32_CalculateRowBandwidth(
+ GPUVMEnable,
+ myPipe[k].SourcePixelFormat,
+ myPipe[k].VRatio,
+ myPipe[k].VRatioChroma,
+ myPipe[k].DCCEnable,
+ myPipe[k].HTotal / myPipe[k].PixelClock,
+ MetaRowByteY[k], MetaRowByteC[k],
+ meta_row_height[k],
+ meta_row_height_chroma[k],
+ PixelPTEBytesPerRowY[k],
+ PixelPTEBytesPerRowC[k],
+ dpte_row_height_luma[k],
+ dpte_row_height_chroma[k],
+
+ /* Output */
+ &meta_row_bw[k],
+ &dpte_row_bw[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
+ __func__, k, use_one_row_for_frame_flip[k]);
+ dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
+ __func__, k, UseMALLForPStateChange[k]);
+ dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
+ dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
+ __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
+ __func__, k, dpte_row_height_chroma[k]);
+ dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
+ __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
+ dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
+ __func__, k, PTEBufferSizeNotExceeded[k]);
+ dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
+ dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
+#endif
+ }
+} // CalculateVMRowAndSwath
+
+unsigned int dml32_CalculateVMAndRowBytes(
+ bool ViewportStationary,
+ bool DCCEnable,
+ unsigned int NumberOfDPPs,
+ unsigned int BlockHeight256Bytes,
+ unsigned int BlockWidth256Bytes,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceTiling,
+ unsigned int BytePerPixel,
+ enum dm_rotation_angle SourceRotation,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int GPUVMMinPageSizeKBytes,
+ unsigned int HostVMMinPageSize,
+ unsigned int PTEBufferSizeInRequests,
+ unsigned int Pitch,
+ unsigned int DCCMetaPitch,
+ unsigned int MacroTileWidth,
+ unsigned int MacroTileHeight,
+
+ /* Output */
+ unsigned int *MetaRowByte,
+ unsigned int *PixelPTEBytesPerRow,
+ unsigned int *dpte_row_width_ub,
+ unsigned int *dpte_row_height,
+ unsigned int *dpte_row_height_linear,
+ unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
+ unsigned int *dpte_row_width_ub_one_row_per_frame,
+ unsigned int *dpte_row_height_one_row_per_frame,
+ unsigned int *MetaRequestWidth,
+ unsigned int *MetaRequestHeight,
+ unsigned int *meta_row_width,
+ unsigned int *meta_row_height,
+ unsigned int *PixelPTEReqWidth,
+ unsigned int *PixelPTEReqHeight,
+ unsigned int *PTERequestSize,
+ unsigned int *DPDE0BytesFrame,
+ unsigned int *MetaPTEBytesFrame)
+{
+ unsigned int MPDEBytesFrame;
+ unsigned int DCCMetaSurfaceBytes;
+ unsigned int ExtraDPDEBytesFrame;
+ unsigned int PDEAndMetaPTEBytesFrame;
+ unsigned int HostVMDynamicLevels = 0;
+ unsigned int MacroTileSizeBytes;
+ unsigned int vp_height_meta_ub;
+ unsigned int vp_height_dpte_ub;
+ unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ }
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (SurfaceTiling == dm_sw_linear) {
+ *meta_row_height = 32;
+ *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
+ - dml_floor(ViewportXStart, *MetaRequestWidth);
+ } else if (!IsVertical(SourceRotation)) {
+ *meta_row_height = *MetaRequestHeight;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
+ *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
+ } else {
+ *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
+ }
+ *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
+ *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
+ } else {
+ *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
+ }
+ *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
+ }
+
+ if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
+ vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
+ 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
+ } else if (!IsVertical(SourceRotation)) {
+ vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
+ } else {
+ vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
+ }
+
+ DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
+
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
+ (8 * 4.0 * 1024), 1) + 1) * 64;
+ MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
+
+ if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
+ if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
+ vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
+ MacroTileHeight - 1, MacroTileHeight) -
+ dml_floor(ViewportYStart, MacroTileHeight);
+ } else if (!IsVertical(SourceRotation)) {
+ vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
+ } else {
+ vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
+ }
+ *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
+ (8 * 2097152), 1) + 1);
+ ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ vp_height_dpte_ub = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
+ dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
+ dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
+ dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
+ dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
+ dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
+ dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
+ dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
+ dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
+ dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
+#endif
+
+ if (HostVMEnable == true)
+ PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ *PTERequestSize = 64;
+ } else if (GPUVMMinPageSizeKBytes == 4) {
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ } else {
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
+ *PTERequestSize = 64;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
+ dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
+ dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
+ dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
+ dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
+#endif
+
+ *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
+ *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
+ (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
+ (double) *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
+ *PTERequestSize;
+
+ if (SurfaceTiling == dm_sw_linear) {
+ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ *PixelPTEReqWidth / Pitch), 1));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
+ PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
+ dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
+ dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
+ dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
+ dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
+ 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
+#endif
+ *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
+ (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
+
+ // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
+ *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
+ PixelPTEReqWidth_linear / Pitch), 1);
+ if (*dpte_row_height_linear > 128)
+ *dpte_row_height_linear = 128;
+
+ } else if (!IsVertical(SourceRotation)) {
+ *dpte_row_height = *PixelPTEReqHeight;
+
+ if (GPUVMMinPageSizeKBytes > 64) {
+ *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
+ *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ } else if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
+ *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
+ dml_floor(ViewportXStart, *PixelPTEReqWidth);
+ } else {
+ *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
+ *PixelPTEReqWidth;
+ }
+
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
+
+ if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
+ *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
+ } else {
+ *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
+ * *PixelPTEReqHeight;
+ }
+
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
+ }
+
+ if (GPUVMEnable != true)
+ *PixelPTEBytesPerRow = 0;
+ if (HostVMEnable == true)
+ *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
+ dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
+ dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
+ dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
+ dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
+ dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
+ __func__, *dpte_row_width_ub_one_row_per_frame);
+ dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
+ __func__, *PixelPTEBytesPerRow_one_row_per_frame);
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
+ *MetaPTEBytesFrame);
+#endif
+
+ return PDEAndMetaPTEBytesFrame;
+} // CalculateVMAndRowBytes
+
+double dml32_CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dm_rotation_angle SourceRotation,
+ bool ViewportStationary,
+ double SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ /* Output */
+ double *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+
+ unsigned int vp_start_rot;
+ unsigned int sw0_tmp;
+ unsigned int MaxPartialSwath;
+ double numLines;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
+ dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
+ dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
+ dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
+ dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
+#endif
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
+ else
+ *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
+
+ if (ViewportStationary) {
+ if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
+ vp_start_rot = SwathHeight -
+ (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
+ } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
+ vp_start_rot = ViewportXStart;
+ } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
+ vp_start_rot = SwathHeight -
+ (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
+ } else {
+ vp_start_rot = ViewportYStart;
+ }
+ sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
+ if (sw0_tmp < *VInitPreFill)
+ *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
+ else
+ *MaxNumSwath = 1;
+ MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
+ } else {
+ *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
+ if (*VInitPreFill > 1)
+ MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
+ else
+ MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
+ }
+ numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
+ dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+#endif
+ return numLines;
+
+} // CalculatePrefetchSourceLines
+
+void dml32_CalculateMALLUseForStaticScreen(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCNFinal,
+ enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ /* output */
+ bool UsesMALLForStaticScreen[])
+{
+ unsigned int k;
+ unsigned int SurfaceToAddToMALL;
+ bool CanAddAnotherSurfaceToMALL;
+ unsigned int TotalSurfaceSizeInMALL;
+
+ TotalSurfaceSizeInMALL = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
+ if (UsesMALLForStaticScreen[k])
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
+ dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
+#endif
+ }
+
+ SurfaceToAddToMALL = 0;
+ CanAddAnotherSurfaceToMALL = true;
+ while (CanAddAnotherSurfaceToMALL) {
+ CanAddAnotherSurfaceToMALL = false;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
+ !UsesMALLForStaticScreen[k] &&
+ UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
+ one_row_per_frame_fits_in_buffer[k] &&
+ (!CanAddAnotherSurfaceToMALL ||
+ SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
+ CanAddAnotherSurfaceToMALL = true;
+ SurfaceToAddToMALL = k;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
+ __func__, k, UseMALLForStaticScreen[k]);
+#endif
+ }
+ }
+ if (CanAddAnotherSurfaceToMALL) {
+ UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
+ dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
+#endif
+
+ }
+ }
+}
+
+void dml32_CalculateRowBandwidth(
+ bool GPUVMEnable,
+ enum source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int MetaRowByteLuma,
+ unsigned int MetaRowByteChroma,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+ /* Output */
+ double *meta_row_bw,
+ double *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
+ SourcePixelFormat == dm_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
+ MetaRowByteChroma / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
+ SourcePixelFormat == dm_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
+ VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+double dml32_CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock)
+{
+ double ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true) {
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
+ (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ }
+ return ret;
+}
+
+void dml32_CalculateUrgentBurstFactor(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int CursorBPP,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ /* Output */
+ double *UrgentBurstFactorCursor,
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ unsigned int LinesInCursorBuffer;
+ double CursorBufferSizeInTime;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
+ (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime /
+ (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
+ DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
+ 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
+ / swath_width_chroma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
+ / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+} // CalculateUrgentBurstFactor
+
+void dml32_CalculateDCFCLKDeepSleep(
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ double VRatio[],
+ double VRatioChroma[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ /* Output */
+ double *DCFClkDeepSleep)
+{
+ unsigned int k;
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
+ double ReadBandwidth = 0.0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
+ / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
+ DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
+ / Dppclk[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
+ BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
+ 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
+ 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+
+ *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
+ dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+ dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
+ dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k)
+ *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+#endif
+} // CalculateDCFCLKDeepSleep
+
+double dml32_CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((double) WritebackDestinationWidth,
+ dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
+ dml_ceil(((double)WritebackSourceHeight -
+ (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
+ (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+void dml32_UseMinimumDCFCLK(
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool DRRDisplay[],
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int MaxPrefetchMode,
+ double DRAMClockChangeLatencyFinal,
+ double FCLKChangeLatency,
+ double SREnterPlusExitTime,
+ unsigned int ReturnBusWidth,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool DynamicMetadataVMEnabled,
+ bool ImmediateFlipRequirement,
+ bool ProgressiveToInterlaceUnitInOPP,
+ double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
+ double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ unsigned int DynamicMetadataTransmittedBytes[],
+ unsigned int DynamicMetadataLinesBeforeActiveRequired[],
+ bool Interlace[],
+ double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
+ double RequiredDISPCLK[][2],
+ double UrgLatency[],
+ unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
+ double ProjectedDCFClkDeepSleep[][2],
+ double MaximumVStartup[][2][DC__NUM_DPP__MAX],
+ unsigned int TotalNumberOfActiveDPP[][2],
+ unsigned int TotalNumberOfDCCActiveDPP[][2],
+ unsigned int dpte_group_bytes[],
+ double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
+ double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
+ double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
+ double MetaRowBytes[][2][DC__NUM_DPP__MAX],
+ bool DynamicMetadataEnable[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double DCFCLKPerState[],
+ /* Output */
+ double DCFCLKState[][2])
+{
+ unsigned int i, j, k;
+ unsigned int dummy1;
+ double dummy2, dummy3;
+ double NormalEfficiency;
+ double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
+
+ NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
+ for (j = 0; j <= 1; ++j) {
+ double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
+ double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
+ double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
+ double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
+ double MinimumTWait = 0.0;
+ double DPTEBandwidth;
+ double DCFCLKRequiredForAverageBandwidth;
+ unsigned int ExtraLatencyBytes;
+ double ExtraLatencyCycles;
+ double DCFCLKRequiredForPeakBandwidth;
+ unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
+ double MinimumTvmPlus2Tr0;
+
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
+ + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
+ / (15.75 * HTotal[k] / PixelClock[k]);
+ }
+
+ for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
+ NoOfDPPState[k] = NoOfDPP[i][j][k];
+
+ DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
+ DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
+
+ ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
+ TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
+ NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+ ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
+ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double DCFCLKCyclesRequiredInPrefetch;
+ double PrefetchTime;
+
+ PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
+ * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
+ + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
+ * BytePerPixelC[k]) / NormalEfficiency
+ / ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
+ + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
+ / NormalEfficiency / ReturnBusWidth
+ * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+ + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
+ / ReturnBusWidth
+ + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
+ + PixelDCFCLKCyclesRequiredInPrefetch[k];
+ PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
+ * HTotal[k] / PixelClock[k];
+ DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
+ DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
+ UrgLatency[i] * GPUVMMaxPageTableLevels *
+ (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+
+ MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
+ UseMALLForPStateChange[k],
+ SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ DRRDisplay[k],
+ DRAMClockChangeLatencyFinal,
+ FCLKChangeLatency,
+ UrgLatency[i],
+ SREnterPlusExitTime);
+
+ PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
+ MinimumTWait - UrgLatency[i] *
+ ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
+ GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
+ HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
+ DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ double ExpectedVRatioPrefetch;
+
+ ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
+ PixelDCFCLKCyclesRequiredInPrefetch[k] /
+ DCFCLKCyclesRequiredInPrefetch);
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
+ PixelDCFCLKCyclesRequiredInPrefetch[k] /
+ PrefetchPixelLinesTime[k] *
+ dml_max(1.0, ExpectedVRatioPrefetch) *
+ dml_max(1.0, ExpectedVRatioPrefetch / 4);
+ if (HostVMEnable == true || ImmediateFlipRequirement == true) {
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] =
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] +
+ NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
+ NormalEfficiency / ReturnBusWidth;
+ }
+ } else {
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
+ }
+ if (DynamicMetadataEnable[k] == true) {
+ double TSetupPipe;
+ double TdmbfPipe;
+ double TdmsksPipe;
+ double TdmecPipe;
+ double AllowedTimeForUrgentExtraLatency;
+
+ dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ RequiredDPPCLKPerSurface[i][j][k],
+ RequiredDISPCLK[i][j],
+ ProjectedDCFClkDeepSleep[i][j],
+ PixelClock[k],
+ HTotal[k],
+ VTotal[k] - VActive[k],
+ DynamicMetadataTransmittedBytes[k],
+ DynamicMetadataLinesBeforeActiveRequired[k],
+ Interlace[k],
+ ProgressiveToInterlaceUnitInOPP,
+
+ /* output */
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &dummy1,
+ &dummy2,
+ &dummy3);
+ AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
+ PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
+ TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0)
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] =
+ dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
+ ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ else
+ DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
+ }
+ }
+ DCFCLKRequiredForPeakBandwidth = 0;
+ for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
+ DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
+ DCFCLKRequiredForPeakBandwidthPerSurface[k];
+ }
+ MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
+ (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
+ (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double MaximumTvmPlus2Tr0PlusTsw;
+
+ MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
+ PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
+ DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
+ } else {
+ DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
+ 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
+ MinimumTvmPlus2Tr0 -
+ PrefetchPixelLinesTime[k] / 4),
+ (2 * ExtraLatencyCycles +
+ PixelDCFCLKCyclesRequiredInPrefetch[k]) /
+ (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
+ }
+ }
+ DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
+ dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+ }
+ }
+}
+
+unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ unsigned int k;
+ double ret;
+ unsigned int HostVMDynamicLevels;
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+
+ ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
+ TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true) {
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
+ (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ }
+ return ret;
+}
+
+void dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+
+ /* output */
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
+ *VUpdateWidthPix =
+ dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
+ *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
+ TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
+ *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
+ *Tdmec = HTotal / PixelClock;
+
+ if (DynamicMetadataLinesBeforeActiveRequired == 0)
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ else
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
+ *Tdmsks = *Tdmsks / 2;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
+
+ dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
+ __func__, DynamicMetadataLinesBeforeActiveRequired);
+ dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
+ dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+#endif
+}
+
+double dml32_CalculateTWait(
+ unsigned int PrefetchMode,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay,
+ double DRAMClockChangeLatency,
+ double FCLKChangeLatency,
+ double UrgentLatency,
+ double SREnterPlusExitTime)
+{
+ double TWait = 0.0;
+
+ if (PrefetchMode == 0 &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
+ !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
+ !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
+ TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ TWait = UrgentLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
+ dml_print("DML::%s: TWait = %f\n", __func__, TWait);
+#endif
+ return TWait;
+} // CalculateTWait
+
+// Function: get_return_bw_mbps
+// Megabyte per second
+double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const bool HostVMEnable,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed)
+{
+ double ReturnBW = 0.;
+ double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
+ double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
+ double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
+ double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
+ double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
+
+ if (HostVMEnable != true)
+ ReturnBW = PixelDataOnlyReturnBW;
+ else
+ ReturnBW = PixelMixedWithVMDataReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
+ dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
+ dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
+ dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
+ dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
+ dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
+ dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
+#endif
+ return ReturnBW;
+}
+
+// Function: get_return_bw_mbps_vm_only
+// Megabyte per second
+double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
+ const int VoltageLevel,
+ const double DCFCLK,
+ const double FabricClock,
+ const double DRAMSpeed)
+{
+ double VMDataOnlyReturnBW = dml_min3(
+ soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
+ * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
+ * (VoltageLevel < 2 ?
+ soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+#endif
+ return VMDataOnlyReturnBW;
+}
+
+double dml32_CalculateExtraLatency(
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int TotalNumberOfDCCActiveDPP,
+ unsigned int MetaChunkSize,
+ double ReturnBW,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ double HostVMInefficiencyFactor,
+ double HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ double ExtraLatencyBytes;
+ double ExtraLatency;
+
+ ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActiveSurfaces,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+} // CalculateExtraLatency
+
+bool dml32_CalculatePrefetchSchedule(
+ double HostVMInefficiencyFactor,
+ DmlPipe *myPipe,
+ unsigned int DSCDelay,
+ double DPPCLKDelaySubtotalPlusCNVCFormater,
+ double DPPCLKDelaySCL,
+ double DPPCLKDelaySCLLBOnly,
+ double DPPCLKDelayCNVCCursor,
+ double DISPCLKDelaySubtotal,
+ unsigned int DPP_RECOUT_WIDTH,
+ enum output_format_class OutputFormat,
+ unsigned int MaxInterDCNTileRepeaters,
+ unsigned int VStartup,
+ unsigned int MaxVStartup,
+ unsigned int GPUVMPageTableLevels,
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ double HostVMMinPageSize,
+ bool DynamicMetadataEnable,
+ bool DynamicMetadataVMEnabled,
+ int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int DynamicMetadataTransmittedBytes,
+ double UrgentLatency,
+ double UrgentExtraLatency,
+ double TCalc,
+ unsigned int PDEAndMetaPTEBytesFrame,
+ unsigned int MetaRowByte,
+ unsigned int PixelPTEBytesPerRow,
+ double PrefetchSourceLinesY,
+ unsigned int SwathWidthY,
+ unsigned int VInitPreFillY,
+ unsigned int MaxNumSwathY,
+ double PrefetchSourceLinesC,
+ unsigned int SwathWidthC,
+ unsigned int VInitPreFillC,
+ unsigned int MaxNumSwathC,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double TWait,
+ /* Output */
+ double *DSTXAfterScaler,
+ double *DSTYAfterScaler,
+ double *DestinationLinesForPrefetch,
+ double *PrefetchBandwidth,
+ double *DestinationLinesToRequestVMInVBlank,
+ double *DestinationLinesToRequestRowInVBlank,
+ double *VRatioPrefetchY,
+ double *VRatioPrefetchC,
+ double *RequiredPrefetchPixDataBWLuma,
+ double *RequiredPrefetchPixDataBWChroma,
+ bool *NotEnoughTimeForDynamicMetadata,
+ double *Tno_bw,
+ double *prefetch_vmrow_bw,
+ double *Tdmdl_vm,
+ double *Tdmdl,
+ double *TSetup,
+ unsigned int *VUpdateOffsetPix,
+ double *VUpdateWidthPix,
+ double *VReadyOffsetPix)
+{
+ bool MyError = false;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double prefetch_bw_oto;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
+
+ if (GPUVMEnable == true && HostVMEnable == true)
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ else
+ HostVMDynamicLevelsTrips = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
+ dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
+ __func__, HostVMEnable, HostVMInefficiencyFactor);
+#endif
+ dml32_CalculateVUpdateAndDynamicMetadataParameters(
+ MaxInterDCNTileRepeaters,
+ myPipe->Dppclk,
+ myPipe->Dispclk,
+ myPipe->DCFClkDeepSleep,
+ myPipe->PixelClock,
+ myPipe->HTotal,
+ myPipe->VBlank,
+ DynamicMetadataTransmittedBytes,
+ DynamicMetadataLinesBeforeActiveRequired,
+ myPipe->InterlaceEnable,
+ myPipe->ProgressiveToInterlaceUnitInOPP,
+ TSetup,
+
+ /* output */
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
+ VUpdateOffsetPix,
+ VUpdateWidthPix,
+ VReadyOffsetPix);
+
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
+
+ if (DynamicMetadataVMEnabled == true)
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
+ else
+ *Tdmdl = TWait + UrgentExtraLatency;
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataEnable == false)
+ *Tdmdl = 0.0;
+#endif
+
+ if (DynamicMetadataEnable == true) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
+ *NotEnoughTimeForDynamicMetadata = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
+ __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
+ __func__, Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
+ __func__, *Tdmdl);
+#endif
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
+ GPUVMEnable == true ? TWait + Tvm_trips : 0);
+
+ if (myPipe->ScalerEnabled)
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+ else
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+
+ DISPCLKCycles = DISPCLKDelaySubtotal;
+
+ if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
+ return true;
+
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
+ myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
+
+ *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
+ + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
+ + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
+ myPipe->HActive / 2 : 0)
+ + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
+ dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
+ dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
+ dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
+ dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
+ dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
+ dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
+#endif
+
+ if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
+ *DSTYAfterScaler = 1;
+ else
+ *DSTYAfterScaler = 0;
+
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
+ dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
+#endif
+
+ MyError = false;
+
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
+
+ if (GPUVMEnable == true) {
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
+ if (GPUVMPageTableLevels >= 3) {
+ *Tno_bw = UrgentExtraLatency + trip_to_mem *
+ (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
+ } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
+ Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
+ 4.0 * LineTime; // VBA_ERROR
+ *Tno_bw = UrgentExtraLatency;
+ } else {
+ *Tno_bw = 0;
+ }
+ } else if (myPipe->DCCEnable == true) {
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
+ *Tno_bw = 0;
+ } else {
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = LineTime / 2.0;
+ *Tno_bw = 0;
+ }
+ Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
+ Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
+
+ if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
+ || myPipe->SourcePixelFormat == dm_420_12) {
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ } else {
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ }
+
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
+ prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
+ prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
+
+ min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+ min_Lsw = dml_max(min_Lsw, 1.0);
+ Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
+
+ if (GPUVMEnable == true) {
+ Tvm_oto = dml_max3(
+ Tvm_trips,
+ *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
+ LineTime / 4.0);
+ } else
+ Tvm_oto = LineTime / 4.0;
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_oto = dml_max4(
+ Tr0_trips,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
+ (LineTime - Tvm_oto)/2.0,
+ LineTime / 4.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
+#endif
+ } else
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
+
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
+
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
+ (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
+ dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
+ dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
+ dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
+#endif
+
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
+ dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
+ dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
+ dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
+ __func__, VStartup * LineTime);
+ dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
+ dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
+ dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
+ dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
+ __func__, *DSTYAfterScaler);
+#endif
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
+ MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
+
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
+
+ *PrefetchBandwidth = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ if (dst_y_prefetch_equ > 1) {
+ double PrefetchBandwidth1;
+ double PrefetchBandwidth2;
+ double PrefetchBandwidth3;
+ double PrefetchBandwidth4;
+
+ if (Tpre_rounded - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
+ } else
+ PrefetchBandwidth1 = 0;
+
+ if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
+ && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
+ PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
+ }
+
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
+ (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
+ else
+ PrefetchBandwidth2 = 0;
+
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
+ } else
+ PrefetchBandwidth3 = 0;
+
+
+ if (VStartup == MaxVStartup &&
+ (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
+ LineTime - Tvm_trips_rounded > 0) {
+ PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
+ }
+
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
+ PrefetchBandwidth4 = prefetch_sw_bytes /
+ (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
+ } else {
+ PrefetchBandwidth4 = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
+ dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
+ dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
+ dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
+ dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
+#endif
+ {
+ bool Case1OK;
+ bool Case2OK;
+ bool Case3OK;
+
+ if (PrefetchBandwidth1 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
+ >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / PrefetchBandwidth1 >= Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (PrefetchBandwidth2 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
+ >= Tvm_trips_rounded
+ && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
+ / PrefetchBandwidth2 < Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (PrefetchBandwidth3 > 0) {
+ if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
+ Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
+ Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK)
+ prefetch_bw_equ = PrefetchBandwidth1;
+ else if (Case2OK)
+ prefetch_bw_equ = PrefetchBandwidth2;
+ else if (Case3OK)
+ prefetch_bw_equ = PrefetchBandwidth3;
+ else
+ prefetch_bw_equ = PrefetchBandwidth4;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
+#endif
+
+ if (prefetch_bw_equ > 0) {
+ if (GPUVMEnable == true) {
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
+ HostVMInefficiencyFactor / prefetch_bw_equ,
+ Tvm_trips, LineTime / 4);
+ } else {
+ Tvm_equ = LineTime / 4;
+ }
+
+ if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
+ (LineTime - Tvm_equ) / 2, LineTime / 4);
+ } else {
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
+ }
+ } else {
+ Tvm_equ = 0;
+ Tr0_equ = 0;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
+#endif
+ }
+ }
+
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
+ }
+
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
+
+ *DestinationLinesToRequestRowInVBlank =
+ dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
+
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
+ *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
+#endif
+
+ if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
+#endif
+ if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ *VRatioPrefetchY =
+ dml_max((double) PrefetchSourceLinesY /
+ LinesToRequestPrefetchPixelData,
+ (double) MaxNumSwathY * SwathHeightY /
+ (LinesToRequestPrefetchPixelData -
+ (VInitPreFillY - 3.0) / 2.0));
+ *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
+ } else {
+ MyError = true;
+ *VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
+#endif
+ }
+
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
+#endif
+ if ((SwathHeightC > 4)) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ *VRatioPrefetchC =
+ dml_max(*VRatioPrefetchC,
+ (double) MaxNumSwathC * SwathHeightC /
+ (LinesToRequestPrefetchPixelData -
+ (VInitPreFillC - 3.0) / 2.0));
+ *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
+ } else {
+ MyError = true;
+ *VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
+#endif
+ }
+
+ *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
+ / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
+ / LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
+ __func__, *RequiredPrefetchPixDataBWLuma);
+#endif
+ *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
+ LinesToRequestPrefetchPixelData
+ * myPipe->BytePerPixelC
+ * swath_width_chroma_ub / LineTime;
+ } else {
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
+ __func__, LinesToRequestPrefetchPixelData);
+#endif
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
+ (double)LinesToRequestPrefetchPixelData * LineTime +
+ 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
+ (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
+ TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
+ ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
+ PixelPTEBytesPerRow);
+#endif
+ } else {
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
+ __func__, dst_y_prefetch_equ);
+#endif
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+ prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
+ (*DestinationLinesToRequestVMInVBlank * LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
+ __func__, *DestinationLinesToRequestVMInVBlank);
+#endif
+ }
+
+ if (MetaRowByte + PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInVBlank * LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ MyError = true;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
+ __func__, *DestinationLinesToRequestRowInVBlank);
+#endif
+ }
+
+ *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (MyError) {
+ *PrefetchBandwidth = 0;
+ TimeForFetchingMetaPTE = 0;
+ TimeForFetchingRowInVBlank = 0;
+ *DestinationLinesToRequestVMInVBlank = 0;
+ *DestinationLinesToRequestRowInVBlank = 0;
+ *DestinationLinesForPrefetch = 0;
+ LinesToRequestPrefetchPixelData = 0;
+ *VRatioPrefetchY = 0;
+ *VRatioPrefetchC = 0;
+ *RequiredPrefetchPixDataBWLuma = 0;
+ *RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return MyError;
+} // CalculatePrefetchSchedule
+
+void dml32_CalculateFlipSchedule(
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+ unsigned int GPUVMMaxPageTableLevels,
+ bool HostVMEnable,
+ unsigned int HostVMMaxNonCachedPageTableLevels,
+ bool GPUVMEnable,
+ double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+ double DPTEBytesPerRow,
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw,
+ bool DCCEnable,
+ unsigned int dpte_row_height,
+ unsigned int meta_row_height,
+ unsigned int dpte_row_height_chroma,
+ unsigned int meta_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+
+ /* Output */
+ double *DestinationLinesToRequestVMInImmediateFlip,
+ double *DestinationLinesToRequestRowInImmediateFlip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe)
+{
+ double min_row_time = 0.0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double TimeForFetchingMetaPTEImmediateFlip;
+ double TimeForFetchingRowInVBlankImmediateFlip;
+ double ImmediateFlipBW;
+
+ if (GPUVMEnable == true && HostVMEnable == true)
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ else
+ HostVMDynamicLevelsTrips = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
+ dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+#endif
+
+ if (TotImmediateFlipBytes > 0) {
+ if (use_one_row_for_frame_flip) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
+ BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ } else {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
+ BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ }
+ if (GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
+ HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency *
+ (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+ if ((GPUVMEnable == true || DCCEnable == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ *DestinationLinesToRequestVMInImmediateFlip =
+ dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
+ *DestinationLinesToRequestRowInImmediateFlip =
+ dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
+
+ if (GPUVMEnable == true) {
+ *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
+ (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+ } else if ((GPUVMEnable == true || DCCEnable == true)) {
+ *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
+ (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ } else {
+ *final_flip_bw = 0;
+ }
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ *DestinationLinesToRequestVMInImmediateFlip = 0;
+ *DestinationLinesToRequestRowInImmediateFlip = 0;
+ *final_flip_bw = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dml_min(dpte_row_height *
+ LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = dml_min(meta_row_height *
+ LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
+ LineTime / VRatio, dpte_row_height_chroma * LineTime /
+ VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
+ }
+ } else {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dpte_row_height * LineTime / VRatio;
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = meta_row_height * LineTime / VRatio;
+ } else {
+ min_row_time =
+ dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ }
+ }
+
+ if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
+ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
+ > min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
+ __func__, *DestinationLinesToRequestVMInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
+ __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
+ __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+} // CalculateFlipSchedule
+
+void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ bool USRRetrainingRequiredFinal,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ unsigned int PrefetchMode,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MaxLineBufferLines,
+ unsigned int LineBufferSize,
+ unsigned int WritebackInterfaceBufferSize,
+ double DCFCLK,
+ double ReturnBW,
+ bool SynchronizeTimingsFinal,
+ bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ bool DRRDisplay[],
+ unsigned int dpte_group_bytes[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ SOCParametersList mmSOCParameters,
+ unsigned int WritebackChunkSize,
+ double SOCCLK,
+ double DCFClkDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int LBBitPerPixel[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+ double HRatio[],
+ double HRatioChroma[],
+ unsigned int VTaps[],
+ unsigned int VTapsChroma[],
+ double VRatio[],
+ double VRatioChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ unsigned int VActive[],
+ double PixelClock[],
+ unsigned int BlendingAndTiming[],
+ unsigned int DPPPerSurface[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+ double DSTXAfterScaler[],
+ double DSTYAfterScaler[],
+ bool WritebackEnable[],
+ enum source_format_class WritebackPixelFormat[],
+ double WritebackDestinationWidth[],
+ double WritebackDestinationHeight[],
+ double WritebackSourceHeight[],
+ bool UnboundedRequestEnabled,
+ unsigned int CompressedBufferSizeInkByte,
+
+ /* Output */
+ Watermarks *Watermark,
+ enum clock_change_support *DRAMClockChangeSupport,
+ double MaxActiveDRAMClockChangeLatencySupported[],
+ unsigned int SubViewportLinesNeededInMALL[],
+ enum dm_fclock_change_support *FCLKChangeSupport,
+ double *MinActiveFCLKChangeLatencySupported,
+ bool *USRRetrainingSupport,
+ double ActiveDRAMClockChangeLatencyMargin[])
+{
+ unsigned int i, j, k;
+ unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
+ unsigned int DRAMClockChangeSupportNumber = 0;
+ unsigned int LastSurfaceWithoutMargin;
+ unsigned int DRAMClockChangeMethod = 0;
+ bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+ double MinActiveFCLKChangeMargin = 0.;
+ double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+ double ActiveClockChangeLatencyHidingY;
+ double ActiveClockChangeLatencyHidingC;
+ double ActiveClockChangeLatencyHiding;
+ double EffectiveDETBufferSizeY;
+ double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
+ double TotalPixelBW = 0.0;
+ bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double WritebackFCLKChangeLatencyMargin;
+ double WritebackLatencyHiding;
+ bool SameTimingForFCLKChange;
+
+ unsigned int TotalActiveWriteback = 0;
+ unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
+ unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
+
+ Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
+ Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
+ + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
+ Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
+ Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
+ Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
+ + 10 / DCFClkDeepSleep;
+ Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
+ + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
+ dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
+ dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
+ dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
+ __func__, Watermark->Z8StutterEnterPlusExitWatermark);
+#endif
+
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (WritebackEnable[k] == true)
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+
+ if (TotalActiveWriteback <= 1) {
+ Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
+ } else {
+ Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
+ + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+ if (USRRetrainingRequiredFinal)
+ Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+ if (TotalActiveWriteback <= 1) {
+ Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency;
+ Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency;
+ } else {
+ Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
+ }
+
+ if (USRRetrainingRequiredFinal)
+ Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+ if (USRRetrainingRequiredFinal)
+ Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
+ + mmSOCParameters.USRRetrainingLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
+ __func__, Watermark->WritebackDRAMClockChangeWatermark);
+ dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
+ dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
+ dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
+ dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
+ SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
+ LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
+
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
+ dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize);
+ dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]);
+ dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
+#endif
+
+ EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveDETBufferSizeY = DETBufferSizeY[k];
+
+ if (UnboundedRequestEnabled) {
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ + CompressedBufferSizeInkByte * 1024
+ * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
+ / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
+ }
+
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+
+ ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+ - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
+
+ if (NumberOfActiveSurfaces > 1) {
+ ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
+ - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
+ / PixelClock[k] / VRatio[k];
+ }
+
+ if (BytePerPixelDETC[k] > 0) {
+ LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
+ / VRatioChroma[k];
+ ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+ - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
+ / PixelClock[k];
+ if (NumberOfActiveSurfaces > 1) {
+ ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
+ - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
+ / PixelClock[k] / VRatioChroma[k];
+ }
+ ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
+ ActiveClockChangeLatencyHidingC);
+ } else {
+ ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
+ }
+
+ ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+ - Watermark->DRAMClockChangeWatermark;
+ ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+ - Watermark->FCLKChangeWatermark;
+ USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
+
+ if (WritebackEnable[k]) {
+ WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
+ / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
+ / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
+ if (WritebackPixelFormat[k] == dm_444_64)
+ WritebackLatencyHiding = WritebackLatencyHiding / 2;
+
+ WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
+ - Watermark->WritebackDRAMClockChangeWatermark;
+
+ WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
+ - Watermark->WritebackFCLKChangeWatermark;
+
+ ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
+ WritebackFCLKChangeLatencyMargin);
+ ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
+ WritebackDRAMClockChangeLatencyMargin);
+ }
+ MaxActiveDRAMClockChangeLatencySupported[k] =
+ (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
+ 0 :
+ (ActiveDRAMClockChangeLatencyMargin[k]
+ + mmSOCParameters.DRAMClockChangeLatency);
+ }
+
+ for (i = 0; i < NumberOfActiveSurfaces; ++i) {
+ for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ if (i == j ||
+ (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
+ (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
+ (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
+ (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
+ HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
+ VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
+ (DRRDisplay[i] || DRRDisplay[j]))) {
+ SynchronizedSurfaces[i][j] = true;
+ } else {
+ SynchronizedSurfaces[i][j] = false;
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
+ ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
+ FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
+ MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
+ SurfaceWithMinActiveFCLKChangeMargin = k;
+ }
+ }
+
+ *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
+
+ SameTimingForFCLKChange = true;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
+ if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (SameTimingForFCLKChange ||
+ ActiveFCLKChangeLatencyMargin[k] <
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
+ }
+ SameTimingForFCLKChange = false;
+ }
+ }
+
+ if (MinActiveFCLKChangeMargin > 0) {
+ *FCLKChangeSupport = dm_fclock_change_vactive;
+ } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
+ (PrefetchMode <= 1)) {
+ *FCLKChangeSupport = dm_fclock_change_vblank;
+ } else {
+ *FCLKChangeSupport = dm_fclock_change_unsupported;
+ }
+
+ *USRRetrainingSupport = true;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (USRRetrainingLatencyMargin[k] < 0)) {
+ *USRRetrainingSupport = false;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
+ UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
+ UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
+ ActiveDRAMClockChangeLatencyMargin[k] < 0) {
+ if (PrefetchMode > 0) {
+ DRAMClockChangeSupportNumber = 2;
+ } else if (DRAMClockChangeSupportNumber == 0) {
+ DRAMClockChangeSupportNumber = 1;
+ LastSurfaceWithoutMargin = k;
+ } else if (DRAMClockChangeSupportNumber == 1 &&
+ !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
+ DRAMClockChangeSupportNumber = 2;
+ }
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
+ DRAMClockChangeMethod = 1;
+ else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
+ DRAMClockChangeMethod = 2;
+ }
+
+ if (DRAMClockChangeMethod == 0) {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ } else if (DRAMClockChangeMethod == 1) {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ } else {
+ if (DRAMClockChangeSupportNumber == 0)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
+ else if (DRAMClockChangeSupportNumber == 1)
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ else
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ unsigned int dst_y_pstate;
+ unsigned int src_y_pstate_l;
+ unsigned int src_y_pstate_c;
+ unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
+
+ dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
+ src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
+ src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
+ sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
+dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
+dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
+dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
+dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
+dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
+dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]);
+dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
+#endif
+ SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
+
+ if (BytePerPixelDETC[k] > 0) {
+ src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
+ src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
+ sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
+ SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
+dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
+dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]);
+dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
+#endif
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
+ dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
+ dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
+ __func__, *MinActiveFCLKChangeLatencySupported);
+ dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
+#endif
+} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
+
+double dml32_CalculateWriteBackDISPCLK(
+ enum source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize,
+ double DISPCLKDPPCLKVCOSpeed)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
+ WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
+ return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
+}
+
+void dml32_CalculateMinAndMaxPrefetchMode(
+ enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
+ unsigned int *MinPrefetchMode,
+ unsigned int *MaxPrefetchMode)
+{
+ if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
+ *MinPrefetchMode = 3;
+ *MaxPrefetchMode = 3;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
+ *MinPrefetchMode = 2;
+ *MaxPrefetchMode = 2;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
+ *MinPrefetchMode = 1;
+ *MaxPrefetchMode = 1;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 0;
+ } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 3;
+ } else {
+ *MinPrefetchMode = 0;
+ *MaxPrefetchMode = 3;
+ }
+} // CalculateMinAndMaxPrefetchMode
+
+void dml32_CalculatePixelDeliveryTimes(
+ unsigned int NumberOfActiveSurfaces,
+ double VRatio[],
+ double VRatioChroma[],
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ unsigned int DPPPerSurface[],
+ double HRatio[],
+ double HRatioChroma[],
+ double PixelClock[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int NumberOfCursors[],
+ unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
+ unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int BlockHeight256BytesC[],
+
+ /* Output */
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ double CursorRequestDeliveryTime[],
+ double CursorRequestDeliveryTimePrefetch[])
+{
+ double req_per_swath_ub;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+ dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+ dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
+#endif
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] =
+ swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] =
+ swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] =
+ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
+ swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
+ swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
+ DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
+ swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
+ __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!IsVertical(SourceRotation[k]))
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ else
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
+#endif
+
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (!IsVertical(SourceRotation[k]))
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ else
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
+#endif
+ DisplayPipeRequestDeliveryTimeChroma[k] =
+ DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
+ __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ unsigned int cursor_req_per_width;
+
+ cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
+ 256.0 / 8.0, 1.0);
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
+ HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
+ PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
+ HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
+ PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
+ __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
+ __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
+ __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+} // CalculatePixelDeliveryTimes
+
+void dml32_CalculateMetaAndPTETimes(
+ bool use_one_row_for_frame[],
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int MetaChunkSize,
+ unsigned int MinMetaChunkSizeBytes,
+ unsigned int HTotal[],
+ double VRatio[],
+ double VRatioChroma[],
+ double DestinationLinesToRequestRowInVBlank[],
+ double DestinationLinesToRequestRowInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int dpte_row_height[],
+ unsigned int dpte_row_height_chroma[],
+ unsigned int meta_row_width[],
+ unsigned int meta_row_width_chroma[],
+ unsigned int meta_row_height[],
+ unsigned int meta_row_height_chroma[],
+ unsigned int meta_req_width[],
+ unsigned int meta_req_width_chroma[],
+ unsigned int meta_req_height[],
+ unsigned int meta_req_height_chroma[],
+ unsigned int dpte_group_bytes[],
+ unsigned int PTERequestSizeY[],
+ unsigned int PTERequestSizeC[],
+ unsigned int PixelPTEReqWidthY[],
+ unsigned int PixelPTEReqHeightY[],
+ unsigned int PixelPTEReqWidthC[],
+ unsigned int PixelPTEReqHeightC[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+
+ /* Output */
+ double DST_Y_PER_PTE_ROW_NOM_L[],
+ double DST_Y_PER_PTE_ROW_NOM_C[],
+ double DST_Y_PER_META_ROW_NOM_L[],
+ double DST_Y_PER_META_ROW_NOM_C[],
+ double TimePerMetaChunkNominal[],
+ double TimePerChromaMetaChunkNominal[],
+ double TimePerMetaChunkVBlank[],
+ double TimePerChromaMetaChunkVBlank[],
+ double TimePerMetaChunkFlip[],
+ double TimePerChromaMetaChunkFlip[],
+ double time_per_pte_group_nom_luma[],
+ double time_per_pte_group_vblank_luma[],
+ double time_per_pte_group_flip_luma[],
+ double time_per_pte_group_nom_chroma[],
+ double time_per_pte_group_vblank_chroma[],
+ double time_per_pte_group_flip_chroma[])
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ unsigned int k;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0)
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ else
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0)
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ else
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (!IsVertical(SourceRotation[k]))
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ else
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+
+ if (meta_row_remainder <= meta_chunk_threshold)
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ else
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
+ meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
+ meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
+ meta_chunk_width_chroma;
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (!IsVertical(SourceRotation[k])) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
+ meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
+ meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ else
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (GPUVMEnable == true) {
+ if (!IsVertical(SourceRotation[k])) {
+ dpte_group_width_luma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
+ } else {
+ dpte_group_width_luma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
+ (double) dpte_group_width_luma / 2.0, 1.0);
+ } else {
+ dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
+ (double) dpte_group_width_luma, 1.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
+ __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
+ __func__, k, dpte_group_bytes[k]);
+ dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
+ __func__, k, PTERequestSizeY[k]);
+ dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
+ __func__, k, PixelPTEReqWidthY[k]);
+ dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
+ __func__, k, PixelPTEReqHeightY[k]);
+ dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
+ __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
+ __func__, k, dpte_group_width_luma);
+ dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
+ __func__, k, dpte_groups_per_row_luma_ub);
+#endif
+
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (!IsVertical(SourceRotation[k])) {
+ dpte_group_width_chroma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
+ } else {
+ dpte_group_width_chroma = (double) dpte_group_bytes[k] /
+ (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
+ (double) dpte_group_width_chroma / 2.0, 1.0);
+ } else {
+ dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
+ (double) dpte_group_width_chroma, 1.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
+ __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
+ __func__, k, dpte_group_width_chroma);
+ dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
+ __func__, k, dpte_groups_per_row_chroma_ub);
+#endif
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
+ __func__, k, DestinationLinesToRequestRowInVBlank[k]);
+ dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
+ __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
+ __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
+ __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
+ __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
+ __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
+ __func__, k, TimePerMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
+ __func__, k, TimePerMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
+ __func__, k, TimePerMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
+ __func__, k, TimePerChromaMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
+ __func__, k, TimePerChromaMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
+ __func__, k, TimePerChromaMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
+ __func__, k, time_per_pte_group_nom_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
+ __func__, k, time_per_pte_group_vblank_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
+ __func__, k, time_per_pte_group_flip_luma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
+ __func__, k, time_per_pte_group_nom_chroma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
+ __func__, k, time_per_pte_group_vblank_chroma[k]);
+ dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
+ __func__, k, time_per_pte_group_flip_chroma[k]);
+#endif
+ }
+} // CalculateMetaAndPTETimes
+
+void dml32_CalculateVMGroupAndRequestTimes(
+ unsigned int NumberOfActiveSurfaces,
+ bool GPUVMEnable,
+ unsigned int GPUVMMaxPageTableLevels,
+ unsigned int HTotal[],
+ unsigned int BytePerPixelC[],
+ double DestinationLinesToRequestVMInVBlank[],
+ double DestinationLinesToRequestVMInImmediateFlip[],
+ bool DCCEnable[],
+ double PixelClock[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+
+ /* Output */
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ unsigned int k;
+ unsigned int num_group_per_lower_vm_stage;
+ unsigned int num_req_per_lower_vm_stage;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
+#endif
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
+ dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
+ dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
+ __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
+ __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+ dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
+ __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
+ __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+#endif
+
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ } else {
+ num_group_per_lower_vm_stage = dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1.0);
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = 2 + dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) +
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
+ (double) (vm_group_bytes[k]), 1);
+ } else {
+ num_group_per_lower_vm_stage = 1 + dml_ceil(
+ (double) (dpde0_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1) + dml_ceil(
+ (double) (meta_pte_bytes_per_frame_ub_l[k]) /
+ (double) (vm_group_bytes[k]), 1);
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
+ dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
+ meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
+ 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
+ meta_pte_bytes_per_frame_ub_l[k] / 64 +
+ meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
+ 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
+ HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
+ HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
+ HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+ dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+#endif
+ }
+} // CalculateVMGroupAndRequestTimes
+
+void dml32_CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dm_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dm_rotation_angle SourceRotation,
+ /* Output */
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+
+ enum RequestType RequestLuma;
+ enum RequestType RequestChroma;
+
+ unsigned int segment_order_horz_contiguous_luma;
+ unsigned int segment_order_horz_contiguous_chroma;
+ unsigned int segment_order_vert_contiguous_luma;
+ unsigned int segment_order_vert_contiguous_chroma;
+ unsigned int req128_horz_wc_l;
+ unsigned int req128_horz_wc_c;
+ unsigned int req128_vert_wc_l;
+ unsigned int req128_vert_wc_c;
+ unsigned int MAS_vp_horz_limit;
+ unsigned int MAS_vp_vert_limit;
+ unsigned int max_vp_horz_width;
+ unsigned int max_vp_vert_height;
+ unsigned int eff_surf_width_l;
+ unsigned int eff_surf_width_c;
+ unsigned int eff_surf_height_l;
+ unsigned int eff_surf_height_c;
+ unsigned int full_swath_bytes_horz_wc_l;
+ unsigned int full_swath_bytes_horz_wc_c;
+ unsigned int full_swath_bytes_vert_wc_l;
+ unsigned int full_swath_bytes_vert_wc_c;
+ unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
+
+ unsigned int yuv420;
+ unsigned int horz_div_l;
+ unsigned int horz_div_c;
+ unsigned int vert_div_l;
+ unsigned int vert_div_c;
+
+ unsigned int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ typedef enum {
+ REQ_256Bytes,
+ REQ_128BytesNonContiguous,
+ REQ_128BytesContiguous,
+ REQ_NA
+ } RequestType;
+
+ yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
+ SourcePixelFormat == dm_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
+ BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
+ (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
+ BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
+ BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
+ (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
+ (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
+ MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
+ max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
+ max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dm_420_10) {
+ full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
+ full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
+ full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
+ full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
+ full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
+ full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
+ full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
+ full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2) {
+ segment_order_horz_contiguous_luma = 0;
+ segment_order_vert_contiguous_luma = 1;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ segment_order_vert_contiguous_luma = 0;
+ }
+
+ if (BytePerPixelC == 2) {
+ segment_order_horz_contiguous_chroma = 0;
+ segment_order_vert_contiguous_chroma = 1;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ segment_order_vert_contiguous_chroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
+ dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
+ dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
+ dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
+ dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
+ dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
+ dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
+ dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
+ __func__, segment_order_horz_contiguous_chroma);
+#endif
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
+ (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
+ (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+
+ } else if (!IsVertical(SourceRotation)) {
+ if (req128_horz_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if (segment_order_horz_contiguous_luma == 0)
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_horz_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if (segment_order_horz_contiguous_chroma == 0)
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+
+ } else {
+ if (req128_vert_wc_l == 0)
+ RequestLuma = REQ_256Bytes;
+ else if (segment_order_vert_contiguous_luma == 0)
+ RequestLuma = REQ_128BytesNonContiguous;
+ else
+ RequestLuma = REQ_128BytesContiguous;
+
+ if (req128_vert_wc_c == 0)
+ RequestChroma = REQ_256Bytes;
+ else if (segment_order_vert_contiguous_chroma == 0)
+ RequestChroma = REQ_128BytesNonContiguous;
+ else
+ RequestChroma = REQ_128BytesContiguous;
+ }
+
+ if (RequestLuma == (enum RequestType) REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == (enum RequestType) REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == (enum RequestType) REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == (enum RequestType) REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
+ dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
+ dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
+ dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
+ dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
+ dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
+#endif
+
+} // CalculateDCCConfiguration
+
+void dml32_CalculateStutterEfficiency(
+ unsigned int CompressedBufferSizeInkByte,
+ enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ bool UnboundedRequestEnabled,
+ unsigned int MetaFIFOSizeInKEntries,
+ unsigned int ZeroSizeBufferEntries,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int ROBBufferSizeInKByte,
+ double TotalDataReadBandwidth,
+ double DCFCLK,
+ double ReturnBW,
+ unsigned int CompbufReservedSpace64B,
+ unsigned int CompbufReservedSpaceZs,
+ double SRExitTime,
+ double SRExitZ8Time,
+ bool SynchronizeTimingsFinal,
+ unsigned int BlendingAndTiming[],
+ double StutterEnterPlusExitWatermark,
+ double Z8StutterEnterPlusExitWatermark,
+ bool ProgressiveToInterlaceUnitInOPP,
+ bool Interlace[],
+ double MinTTUVBlank[],
+ unsigned int DPPPerSurface[],
+ unsigned int DETBufferSizeY[],
+ unsigned int BytePerPixelY[],
+ double BytePerPixelDETY[],
+ double SwathWidthY[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ double NetDCCRateLuma[],
+ double NetDCCRateChroma[],
+ double DCCFractionOfZeroSizeRequestsLuma[],
+ double DCCFractionOfZeroSizeRequestsChroma[],
+ unsigned int HTotal[],
+ unsigned int VTotal[],
+ double PixelClock[],
+ double VRatio[],
+ enum dm_rotation_angle SourceRotation[],
+ unsigned int BlockHeight256BytesY[],
+ unsigned int BlockWidth256BytesY[],
+ unsigned int BlockHeight256BytesC[],
+ unsigned int BlockWidth256BytesC[],
+ unsigned int DCCYMaxUncompressedBlock[],
+ unsigned int DCCCMaxUncompressedBlock[],
+ unsigned int VActive[],
+ bool DCCEnable[],
+ bool WritebackEnable[],
+ double ReadBandwidthSurfaceLuma[],
+ double ReadBandwidthSurfaceChroma[],
+ double meta_row_bw[],
+ double dpte_row_bw[],
+
+ /* Output */
+ double *StutterEfficiencyNotIncludingVBlank,
+ double *StutterEfficiency,
+ unsigned int *NumberOfStutterBurstsPerFrame,
+ double *Z8StutterEfficiencyNotIncludingVBlank,
+ double *Z8StutterEfficiency,
+ unsigned int *Z8NumberOfStutterBurstsPerFrame,
+ double *StutterPeriod,
+ bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
+{
+
+ bool FoundCriticalSurface = false;
+ unsigned int SwathSizeCriticalSurface = 0;
+ unsigned int LastChunkOfSwathSize;
+ unsigned int MissingPartOfLastSwathOfDETSize;
+ double LastZ8StutterPeriod = 0.0;
+ double LastStutterPeriod = 0.0;
+ unsigned int TotalNumberOfActiveOTG = 0;
+ double doublePixelClock;
+ unsigned int doubleHTotal;
+ unsigned int doubleVTotal;
+ bool SameTiming = true;
+ double DETBufferingTimeY;
+ double SwathWidthYCriticalSurface = 0.0;
+ double SwathHeightYCriticalSurface = 0.0;
+ double VActiveTimeCriticalSurface = 0.0;
+ double FrameTimeCriticalSurface = 0.0;
+ unsigned int BytePerPixelYCriticalSurface = 0;
+ double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
+ unsigned int DETBufferSizeYCriticalSurface = 0;
+ double MinTTUVBlankCriticalSurface = 0.0;
+ unsigned int BlockWidth256BytesYCriticalSurface = 0;
+ bool doublePlaneCriticalSurface = 0;
+ bool doublePipeCriticalSurface = 0;
+ double TotalCompressedReadBandwidth;
+ double TotalRowReadBandwidth;
+ double AverageDCCCompressionRate;
+ double EffectiveCompressedBufferSize;
+ double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
+ double StutterBurstTime;
+ unsigned int TotalActiveWriteback;
+ double LinesInDETY;
+ double LinesInDETYRoundedDownToSwath;
+ double MaximumEffectiveCompressionLuma;
+ double MaximumEffectiveCompressionChroma;
+ double TotalZeroSizeRequestReadBandwidth;
+ double TotalZeroSizeCompressedReadBandwidth;
+ double AverageDCCZeroSizeFraction;
+ double AverageZeroSizeCompressionRate;
+ unsigned int k;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ if (DCCEnable[k] == true) {
+ if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
+ || (!IsVertical(SourceRotation[k])
+ && BlockHeight256BytesY[k] > SwathHeightY[k])
+ || DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k]
+ / dml_min(NetDCCRateLuma[k],
+ MaximumEffectiveCompressionLuma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
+ __func__, k, NetDCCRateLuma[k]);
+ dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
+ __func__, k, MaximumEffectiveCompressionLuma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
+ / MaximumEffectiveCompressionLuma;
+
+ if (ReadBandwidthSurfaceChroma[k] > 0) {
+ if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
+ || (!IsVertical(SourceRotation[k])
+ && BlockHeight256BytesC[k] > SwathHeightC[k])
+ || DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth =
+ TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ / dml_min(NetDCCRateChroma[k],
+ MaximumEffectiveCompressionChroma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
+ __func__, k, ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
+ __func__, k, NetDCCRateChroma[k]);
+ dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
+ __func__, k, MaximumEffectiveCompressionChroma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ * DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
+ + ReadBandwidthSurfaceChroma[k]
+ * DCCFractionOfZeroSizeRequestsChroma[k]
+ / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
+ + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth
+ + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
+ }
+ }
+
+ AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
+ __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
+ dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
+#endif
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
+ / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
+ * AverageZeroSizeCompressionRate
+ + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
+ * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
+ / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min(
+ (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (double) MetaFIFOSizeInKEntries * 1024 * 64
+ / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
+ + 1 / AverageDCCCompressionRate))
+ + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
+ * AverageDCCCompressionRate,
+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
+ / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__,
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
+ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
+ AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
+ CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
+ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+#endif
+ } else {
+ EffectiveCompressedBufferSize = dml_min(
+ (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
+ + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
+ * AverageDCCCompressionRate;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__,
+ CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__,
+ MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *StutterPeriod = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ LinesInDETY = ((double) DETBufferSizeY[k]
+ + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
+ * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
+ / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
+ / VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
+ dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
+ __func__, k, ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
+ __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
+ dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
+#endif
+
+ if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
+ bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
+
+ FoundCriticalSurface = true;
+ *StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalSurface = (
+ isInterlaceTiming ?
+ dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
+ * (double) HTotal[k] / PixelClock[k];
+ VActiveTimeCriticalSurface = (
+ isInterlaceTiming ?
+ dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
+ * (double) HTotal[k] / PixelClock[k];
+ BytePerPixelYCriticalSurface = BytePerPixelY[k];
+ SwathWidthYCriticalSurface = SwathWidthY[k];
+ SwathHeightYCriticalSurface = SwathHeightY[k];
+ BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
+ LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
+ - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
+ MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
+ doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
+ doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
+ __func__, k, FoundCriticalSurface);
+ dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
+ __func__, k, *StutterPeriod);
+ dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
+ __func__, k, MinTTUVBlankCriticalSurface);
+ dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
+ __func__, k, FrameTimeCriticalSurface);
+ dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
+ __func__, k, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
+ __func__, k, BytePerPixelYCriticalSurface);
+ dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
+ __func__, k, SwathWidthYCriticalSurface);
+ dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
+ __func__, k, SwathHeightYCriticalSurface);
+ dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
+ __func__, k, BlockWidth256BytesYCriticalSurface);
+ dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
+ __func__, k, doublePlaneCriticalSurface);
+ dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
+ __func__, k, doublePipeCriticalSurface);
+ dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
+ __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
+#endif
+ }
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
+ EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
+ __func__, *StutterPeriod * TotalDataReadBandwidth);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
+ / ReturnBW
+ + (*StutterPeriod * TotalDataReadBandwidth
+ - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
+ + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
+ AverageDCCCompressionRate / ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
+ __func__, (*StutterPeriod * TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime,
+ LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
+ * SwathWidthYCriticalSurface / ReturnBW);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Time to finish residue swath=%f\n",
+ __func__,
+ LinesToFinishSwathTransferStutterCriticalSurface *
+ BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
+#endif
+
+ TotalActiveWriteback = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (WritebackEnable[k])
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+#endif
+ *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
+ 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
+ *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
+ 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
+ *NumberOfStutterBurstsPerFrame = (
+ *StutterEfficiencyNotIncludingVBlank > 0 ?
+ dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
+ *Z8NumberOfStutterBurstsPerFrame = (
+ *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
+ dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
+ } else {
+ *StutterEfficiencyNotIncludingVBlank = 0.;
+ *Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *NumberOfStutterBurstsPerFrame = 0;
+ *Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ if (BlendingAndTiming[k] == k) {
+ if (TotalNumberOfActiveOTG == 0) {
+ doublePixelClock = PixelClock[k];
+ doubleHTotal = HTotal[k];
+ doubleVTotal = VTotal[k];
+ } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
+ || doubleVTotal != VTotal[k]) {
+ SameTiming = false;
+ }
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+ }
+
+ if (*StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+
+ if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
+ && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
+ *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
+ + StutterBurstTime * VActiveTimeCriticalSurface
+ / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *StutterEfficiency = 0;
+ }
+
+ if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalSurface
+ - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
+ if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
+ MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
+ *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
+ * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *Z8StutterEfficiency = 0.;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
+ __func__, *StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
+ * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
+ LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
+ MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
+ - DETBufferSizeYCriticalSurface;
+
+ *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
+ && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
+ && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
+ && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
+ dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
+ dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
+ dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+#endif
+} // CalculateStutterEfficiency
+
+void dml32_CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte)
+{
+ bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
+ unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
+
+ *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
+ (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
+ *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
+ *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
+ dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
+ dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
+#endif
+
+ if (det_buff_size_override_en) {
+ *nomDETInKByte = det_buff_size_override_val;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
+#endif
+ }
+} // CalculateMaxDETAndMinCompressedBufferSize
+
+bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[])
+{
+ unsigned int k;
+ bool NotEnoughUrgentLatencyHiding = false;
+ bool CalculateVActiveBandwithSupport_val = false;
+ double VActiveBandwith = 0;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
+ }
+
+ CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+
+#ifdef __DML_VBA_DEBUG__
+dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
+dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
+dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
+#endif
+ return CalculateVActiveBandwithSupport_val;
+}
+
+void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ bool NotUrgentLatencyHiding[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *PrefetchBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *PrefetchBandwidthSupport)
+{
+ unsigned int k;
+ bool NotEnoughUrgentLatencyHiding = false;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ *PrefetchBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+ *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
+}
+
+double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double cursor_bw_pre[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[])
+{
+ unsigned int k;
+ double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
+
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ return CalculateBandwidthAvailableForImmediateFlip_val;
+}
+
+void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ enum immediate_flip_requirement ImmediateFlipRequirement[],
+ double final_flip_bw[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double cursor_bw[],
+ double meta_row_bandwidth[],
+ double dpte_row_bandwidth[],
+ double cursor_bw_pre[],
+ double prefetch_vmrow_bw[],
+ unsigned int NumberOfDPP[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+
+ /* output */
+ double *TotalBandwidth,
+ double *FractionOfUrgentBandwidth,
+ bool *ImmediateFlipBandwidthSupport)
+{
+ unsigned int k;
+ *TotalBandwidth = 0;
+ for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ } else {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+ }
+ *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
+ *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
+}